diff --git a/.github/workflows/query-tests.yml b/.github/workflows/query-tests.yml new file mode 100644 index 0000000..4f11ede --- /dev/null +++ b/.github/workflows/query-tests.yml @@ -0,0 +1,45 @@ +name: Validate BigQuery examples + +# Dry-run validates every -decorated ```sql block in +# src/content/articles/ against live BigQuery schemas (free — no data scanned). +# +# Authenticates via Workload Identity Federation — no long-lived keys or +# repository secrets. The GitHub OIDC token is exchanged for credentials of +# the kb-query-tests service account in the measurement-lab project. +# Fork PRs cannot mint OIDC tokens, so the job skips for them. + +on: + workflow_dispatch: + pull_request: + paths: + - 'src/content/articles/**' + - 'scripts/test-queries.js' + - '.github/workflows/query-tests.yml' + +permissions: + id-token: write + contents: read + +jobs: + dry-run: + runs-on: ubuntu-latest + # id-token: write is not granted to pull_request runs from forks — + # skip rather than fail. + if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository + steps: + - uses: actions/checkout@v6 + + - name: Authenticate to Google Cloud + uses: google-github-actions/auth@v2 + with: + project_id: measurement-lab + workload_identity_provider: projects/808951263862/locations/global/workloadIdentityPools/github/providers/m-lab-repos + service_account: kb-query-tests@measurement-lab.iam.gserviceaccount.com + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 24 + + - name: Dry-run decorated queries + run: node scripts/test-queries.js diff --git a/README.md b/README.md index 1647cb1..a515b35 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,34 @@ Four variants: Wrap terms, column names, file paths, and short code snippets in backticks: `` `download_p50` ``, `` `country_code` ``. The site renders them as gray rounded pills — no extra decoration needed. +## Testing BigQuery examples + +SQL code blocks in articles can be validated against the live BigQuery schemas using free dry-run queries — no data is scanned and nothing is billed. This catches syntax errors and stale column paths (e.g. a field that moved between the `ndt.tcpinfo` view and raw tables) before an article ships. + +**Annotating a block.** Testing is opt-in per block. Put an HTML comment on the line directly above the fence: + +````markdown + +```sql +SELECT ... FROM `measurement-lab.ndt.ndt7` ... +``` +```` + +A decorated block must be a complete, runnable GoogleSQL query. To permanently exclude a block that looks like SQL but isn't runnable (templates, pseudo-code, deliberately broken examples), use `` instead. Undecorated blocks are ignored. + +**Running locally.** You need gcloud Application Default Credentials and a default project (queries are dry-run only, so any project with the BigQuery API enabled works): + +```bash +gcloud auth application-default login +npm run test:queries # all decorated blocks, all articles +npm run test:queries -- --file tcpinfo # one article (filename substring) +npm run test:queries -- --all # every ```sql block, decorated or not +``` + +Each block reports pass/fail with its file, line number, and estimated bytes the query would scan. Without credentials the script skips with a warning and exits 0, so it never blocks contributors or builds. + +**In CI.** `.github/workflows/query-tests.yml` runs the decorated-block validation on any PR touching `src/content/articles/`. It authenticates via Workload Identity Federation, exchanging the workflow's GitHub OIDC token for short-lived credentials of the `kb-query-tests` service account in the `measurement-lab` project — no stored keys or repository secrets. PRs from forks can't mint OIDC tokens, so the job skips for them instead of failing. + ## Deploy to GitHub Pages 1. Push to GitHub diff --git a/package.json b/package.json index 5207b3a..6b3dbcb 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,8 @@ "dev": "astro dev", "build": "astro build", "preview": "astro preview", - "todo": "node scripts/todo.js" + "todo": "node scripts/todo.js", + "test:queries": "node scripts/test-queries.js" }, "dependencies": { "@astrojs/svelte": "^7.0.0", diff --git a/scripts/test-queries.js b/scripts/test-queries.js new file mode 100644 index 0000000..f964051 --- /dev/null +++ b/scripts/test-queries.js @@ -0,0 +1,165 @@ +#!/usr/bin/env node +// npm run test:queries [-- --file tcpinfo-snapshot-analysis.md] [-- --all] +// +// Validates ```sql code blocks in article markdown against BigQuery using a +// dry run. Dry runs are free, scan no data, and verify both GoogleSQL syntax +// and every column path against the live table schema — catching stale column +// references without maintaining a schema copy. +// +// Opt-in by decoration: only blocks with an HTML comment on the line above +// the fence are tested (this is what CI runs): +// validate this block with a dry run +// never test this block (templates, pseudo-SQL) +// Pass --all to test every ```sql block regardless of decoration. +// +// Optional by design: requires gcloud Application Default Credentials. +// gcloud auth application-default login +// If credentials or gcloud are missing, the script skips (exit 0) so builds +// and contributors without GCP access are unaffected. + +import { readFileSync, readdirSync } from 'fs'; +import { join } from 'path'; +import { execFileSync } from 'child_process'; +import { fileURLToPath } from 'url'; + +const __dirname = fileURLToPath(new URL('.', import.meta.url)); +const articlesDir = join(__dirname, '../src/content/articles'); + +const reset = '\x1b[0m', bold = '\x1b[1m', dim = '\x1b[2m'; +const red = '\x1b[31m', green = '\x1b[32m', yellow = '\x1b[33m', cyan = '\x1b[36m'; + +// ---------- CLI args ---------- +const args = process.argv.slice(2); +let fileFilter = null; +const fileIdx = args.indexOf('--file'); +if (fileIdx !== -1) fileFilter = args[fileIdx + 1]; +const testAll = args.includes('--all'); + +// ---------- extract ```sql blocks ---------- +function extractSqlBlocks(src, fname) { + const lines = src.split('\n'); + const blocks = []; + let inBlock = false, buf = [], startLine = 0, ann = null; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (!inBlock && /^```sql\s*$/.test(line.trim())) { + inBlock = true; + startLine = i + 2; // first SQL line (1-based) + buf = []; + // look back past blank lines for a decoration + let j = i - 1; + while (j >= 0 && lines[j].trim() === '') j--; + const m = j >= 0 ? lines[j].match(//) : null; + ann = m ? (m[1] || 'dry-run') : null; + continue; + } + if (inBlock && /^```\s*$/.test(line.trim())) { + inBlock = false; + const sql = buf.join('\n').trim(); + const label = (sql.match(/^--\s*(.+)$/m) || [])[1] || `block at :${startLine}`; + if (sql) blocks.push({ file: fname, line: startLine, sql, label, ann }); + continue; + } + if (inBlock) buf.push(line); + } + return blocks; +} + +const files = readdirSync(articlesDir) + .filter((f) => (f.endsWith('.md') || f.endsWith('.mdx'))) + .filter((f) => !fileFilter || f.includes(fileFilter)) + .sort(); + +const allBlocks = files.flatMap((f) => + extractSqlBlocks(readFileSync(join(articlesDir, f), 'utf8'), f) +); + +// Default: only decorated blocks. --all: everything except sqltest: skip. +const blocks = allBlocks.filter((b) => + b.ann === 'skip' ? false : testAll ? true : b.ann !== null +); +const undecorated = allBlocks.length - blocks.length; + +if (blocks.length === 0) { + console.log( + `No testable \`\`\`sql blocks found${fileFilter ? ` matching --file ${fileFilter}` : ''}` + + (undecorated ? ` (${undecorated} undecorated — add or pass --all)` : '') + + '.' + ); + process.exit(0); +} + +// ---------- credentials (optional) ---------- +function gcloud(cmdArgs) { + try { + return execFileSync('gcloud', cmdArgs, { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }).trim(); + } catch { + return null; + } +} + +const token = gcloud(['auth', 'application-default', 'print-access-token']); +if (!token) { + console.log(`${yellow}⚠ Skipping query tests: no Application Default Credentials.${reset}`); + console.log(`${dim} Run: gcloud auth application-default login${reset}`); + process.exit(0); +} + +const project = + process.env.GOOGLE_CLOUD_PROJECT || + process.env.GCLOUD_PROJECT || + process.env.CLOUDSDK_CORE_PROJECT || + gcloud(['config', 'get-value', 'project']); +if (!project || project === '(unset)') { + console.log(`${yellow}⚠ Skipping query tests: no GCP project set.${reset}`); + console.log(`${dim} Set GOOGLE_CLOUD_PROJECT or: gcloud config set project ${reset}`); + process.exit(0); +} + +// ---------- dry-run each block ---------- +async function dryRun(sql) { + const res = await fetch( + `https://bigquery.googleapis.com/bigquery/v2/projects/${project}/queries`, + { + method: 'POST', + headers: { Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, + body: JSON.stringify({ query: sql, dryRun: true, useLegacySql: false }), + } + ); + const body = await res.json(); + if (!res.ok) throw new Error(body.error?.message || `HTTP ${res.status}`); + return Number(body.totalBytesProcessed || 0); +} + +function fmtBytes(n) { + if (n >= 1e12) return (n / 1e12).toFixed(2) + ' TB'; + if (n >= 1e9) return (n / 1e9).toFixed(2) + ' GB'; + if (n >= 1e6) return (n / 1e6).toFixed(2) + ' MB'; + return n + ' B'; +} + +console.log(`${bold}M-Lab KB — BigQuery query tests${reset} ${dim}(dry run, project: ${project})${reset}\n`); + +let failed = 0, lastFile = null; +for (const b of blocks) { + if (b.file !== lastFile) { + console.log(` ${cyan}${b.file}${reset}`); + lastFile = b.file; + } + try { + const bytes = await dryRun(b.sql); + console.log(` ${green}✓${reset}${dim}:${b.line}${reset} ${b.label} ${dim}(would scan ${fmtBytes(bytes)} if run — dry run is free)${reset}`); + } catch (err) { + failed++; + console.log(` ${red}✗${reset}${dim}:${b.line}${reset} ${b.label}`); + console.log(` ${red}${err.message}${reset}`); + } +} + +const passed = blocks.length - failed; +console.log( + `\n${bold}${failed ? red : green}${passed} passed${reset}${dim}, ${failed} failed` + + (undecorated ? `, ${undecorated} undecorated/skipped` : '') + + reset +); +process.exit(failed ? 1 : 0); diff --git a/src/content/articles/byos-overview.md b/src/content/articles/byos-overview.md index 5f91295..20d618c 100644 --- a/src/content/articles/byos-overview.md +++ b/src/content/articles/byos-overview.md @@ -76,13 +76,14 @@ Measurements from BYOS nodes flow through M-Lab's standard data pipeline: - Data is processed and published to BigQuery within ~24 hours - You can filter for your node's data using the `server.Site` field in BigQuery: + ```sql +-- Query a specific site-id for BYOS test data SELECT a.TestTime, a.MeanThroughputMbps, client.Network.ASName FROM `measurement-lab.ndt.ndt7` WHERE server.Site = 'your-site-id' - AND DATE(a.TestTime) >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) + AND date >= DATE_SUB(CURRENT_DATE(), INTERVAL 7 DAY) ORDER BY a.TestTime DESC -LIMIT 50 ``` ## Monitoring Your Node diff --git a/src/content/articles/core-service-tcp-info.md b/src/content/articles/core-service-tcp-info.md index d2b5843..a5d8756 100644 --- a/src/content/articles/core-service-tcp-info.md +++ b/src/content/articles/core-service-tcp-info.md @@ -46,18 +46,19 @@ TCP INFO data uses M-Lab's standard column format as described in the [Long Term **Joining TCP INFO with NDT results:** + ```sql +-- Joining TCP info with NDT results SELECT ndt.a.TestTime, ndt.a.MeanThroughputMbps, ndt.client.Geo.CountryCode, - tcpinfo.MinRTT, - tcpinfo.SndCwnd + tcpinfo.a.FinalSnapshot.tcpinfo.MinRTT, + tcpinfo.a.FinalSnapshot.tcpinfo.SndCwnd FROM `measurement-lab.ndt.ndt7` AS ndt JOIN `measurement-lab.ndt.tcpinfo` AS tcpinfo ON ndt.id = tcpinfo.id -WHERE DATE(ndt.a.TestTime) = '2024-06-01' -LIMIT 1000 +WHERE ndt.date = '2024-06-01' and tcpinfo.date = '2024-06-01' ``` diff --git a/src/content/articles/core-service-traceroute.md b/src/content/articles/core-service-traceroute.md index 6d026a4..890dfd4 100644 --- a/src/content/articles/core-service-traceroute.md +++ b/src/content/articles/core-service-traceroute.md @@ -70,22 +70,25 @@ Both use the `id` field (the flow UUID) to join with corresponding measurement t **Join NDT result with its forward traceroute:** + ```sql +-- Join NDT results with forward traceroute SELECT ndt.a.TestTime, ndt.a.MeanThroughputMbps, ndt.client.Geo.CountryCode, tr.raw.Tracelb.Dst AS traceroute_destination -FROM `measurement-lab.ndt.ndt7_union` AS ndt +FROM `measurement-lab.ndt.ndt7` AS ndt JOIN `measurement-lab.ndt_raw.scamper1` AS tr ON ndt.id = tr.id -WHERE DATE(ndt.a.TestTime) = '2024-06-01' -LIMIT 100 +WHERE ndt.date = '2024-06-01' and tr.date = '2024-06-01' ``` **Extract individual hops** (traceroute uses a deeply nested schema — UNNEST to work with individual hop data): + ```sql +-- Extract individual hops SELECT a.StartTime, a.Source.IP AS server_ip, @@ -95,10 +98,10 @@ SELECT rtt.RTT AS hop_rtt_ms FROM `measurement-lab.ndt_raw.scamper1`, UNNEST(raw.Tracelb.Nodes) AS hop, - UNNEST(hop.Links) AS link, - UNNEST(link.Probes) AS probe, - UNNEST(probe.Replies) AS rtt -WHERE DATE(a.StartTime) = '2024-01-15' + UNNEST(raw.Tracelb.nodes.Links) AS link, + UNNEST(link.links.Probes) AS probe, + UNNEST(link.links.Probes.Replies) AS rtt +WHERE date = '2024-01-15' LIMIT 100 ``` diff --git a/src/content/articles/getting-started-bigquery.md b/src/content/articles/getting-started-bigquery.md index 10fc89c..d531e92 100644 --- a/src/content/articles/getting-started-bigquery.md +++ b/src/content/articles/getting-started-bigquery.md @@ -58,13 +58,15 @@ For most use cases, start with `measurement-lab.ndt.ndt7_union`. Average download speed by country for the last 30 days: + ```sql +-- Average download speed by country for the last 30 days SELECT client.Geo.CountryCode AS country, ROUND(AVG(a.MeanThroughputMbps), 2) AS avg_download_mbps, COUNT(*) AS test_count -FROM `measurement-lab.ndt.ndt7_union` -WHERE DATE(a.TestTime) >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) +FROM `measurement-lab.ndt.ndt7` +WHERE date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) AND a.MeanThroughputMbps > 0 AND a.MeanThroughputMbps < 10000 -- exclude outliers GROUP BY country @@ -129,7 +131,9 @@ Use the **preview** feature in the BigQuery UI to inspect data before running qu For larger analyses, export to Google Cloud Storage rather than downloading from BigQuery: + ```sql +-- Export example EXPORT DATA OPTIONS ( uri = 'gs://your-bucket/ndt7-export-*.csv', @@ -138,8 +142,8 @@ EXPORT DATA ) AS ( SELECT a.TestTime, a.MeanThroughputMbps, client.Geo.CountryCode - FROM `measurement-lab.ndt.ndt7_union` - WHERE DATE(a.TestTime) = '2024-06-01' + FROM `measurement-lab.ndt.ndt7` + WHERE date = '2024-06-01' ); ``` diff --git a/src/content/articles/getting-started-isp-ixp.md b/src/content/articles/getting-started-isp-ixp.md index 32a9683..01cb734 100644 --- a/src/content/articles/getting-started-isp-ixp.md +++ b/src/content/articles/getting-started-isp-ixp.md @@ -30,14 +30,16 @@ Every test run against an M-Lab server includes the client's IP address and ASN **Sample query — performance summary for your ASN:** + ```sql +-- Performance of your ASN SELECT ROUND(APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(50)], 2) AS median_download_mbps, ROUND(APPROX_QUANTILES(a.MinRTT, 100)[OFFSET(50)], 2) AS median_rtt_ms, COUNT(*) AS test_count -FROM `measurement-lab.ndt.ndt7_union` -WHERE DATE(a.TestTime) >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) - AND client.Network.ASNumber = +FROM `measurement-lab.ndt.ndt7` +WHERE date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) + AND client.Network.ASNumber = 12345 AND a.MeanThroughputMbps > 0 ``` diff --git a/src/content/articles/internet-quality-beyond-speed.md b/src/content/articles/internet-quality-beyond-speed.md index ec17678..c7aacf4 100644 --- a/src/content/articles/internet-quality-beyond-speed.md +++ b/src/content/articles/internet-quality-beyond-speed.md @@ -31,12 +31,14 @@ A 1 Gbps connection with 200 ms latency or 2% packet loss will perform worse for **Working latency** (sometimes called "loaded latency") is the round-trip time during a data transfer. When a connection is busy sending or receiving data, latency often spikes dramatically — this is the real-world experience of lag during uploads or video calls. In M-Lab NDT7 data: + ```sql +-- NDT7 Data Query SELECT a.MinRTT, -- best-case (unloaded) latency in ms a.MeanThroughputMbps FROM `measurement-lab.ndt.ndt7` -WHERE DATE(a.TestTime) = '2024-06-01' +WHERE date = '2024-06-01' ``` ### Packet Loss diff --git a/src/content/articles/mlab-annotations-explained.md b/src/content/articles/mlab-annotations-explained.md index bb469b3..6d85992 100644 --- a/src/content/articles/mlab-annotations-explained.md +++ b/src/content/articles/mlab-annotations-explained.md @@ -61,15 +61,16 @@ M-Lab’s built-in geolocation is appropriate for coarse spatial summaries, but M-Lab uses ISO 3166-2 codes for subdivisions such as states and provinces: + ```sql -- US state-level analysis SELECT client.Geo.Region AS state_code, COUNT(*) AS tests, ROUND(AVG(a.MeanThroughputMbps), 2) AS avg_mbps -FROM `measurement-lab.ndt.ndt7_union` +FROM `measurement-lab.ndt.ndt7` WHERE client.Geo.CountryCode = 'US' - AND DATE(a.TestTime) BETWEEN '2024-01-01' AND '2024-12-31' + AND date BETWEEN '2024-01-01' AND '2024-12-31' GROUP BY state_code ORDER BY tests DESC; ### Improving Spatial Precision @@ -93,6 +94,7 @@ IP-to-AS mappings are derived from routing data and can be affected by route vis The accompanying `ASName` field provides a human-readable name for that ASN. This name is useful for display and interpretation, but it should not be treated as a stable identifier. For ISP comparisons, prefer `ASNumber` over `ASName`. AS names can change due to mergers and rebranding, whereas ASNs are more stable identifiers. Even so, some organizations operate multiple ASNs, and some ASNs contain multiple brands or customer populations. + ```sql -- Top ISPs by test volume in a country SELECT @@ -100,9 +102,9 @@ SELECT MAX(client.Network.ASName) AS isp_name, -- stable within ASN COUNT(*) AS test_count, ROUND(APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(50)], 2) AS median_mbps -FROM `measurement-lab.ndt.ndt7_union` +FROM `measurement-lab.ndt.ndt7` WHERE client.Geo.CountryCode = 'BR' - AND DATE(a.TestTime) BETWEEN '2024-01-01' AND '2024-03-31' + AND date BETWEEN '2024-01-01' AND '2024-03-31' GROUP BY asn HAVING test_count > 1000 ORDER BY test_count DESC diff --git a/src/content/articles/research-guide.md b/src/content/articles/research-guide.md index bd61eb1..c2d30f8 100644 --- a/src/content/articles/research-guide.md +++ b/src/content/articles/research-guide.md @@ -45,7 +45,9 @@ For exploratory analysis and visualization without writing SQL, the [M-Lab Obser ### ISP Performance Comparison + ```sql +-- ISP Performance Comparison SELECT client.Network.ASName AS isp, ROUND(APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(50)], 2) @@ -54,24 +56,25 @@ SELECT AS median_rtt_ms, COUNT(*) AS test_count FROM `measurement-lab.ndt.ndt7` -WHERE DATE(a.TestTime) BETWEEN '2024-01-01' AND '2024-03-31' +WHERE date BETWEEN '2024-01-01' AND '2024-03-31' AND client.Geo.CountryCode = 'US' AND a.MeanThroughputMbps > 0 GROUP BY isp HAVING test_count > 10000 ORDER BY median_download_mbps DESC -LIMIT 25 ``` ### Geographic Coverage Analysis + ```sql +-- Geographic coverage analysis SELECT client.Geo.Region AS region, COUNT(*) AS test_count, ROUND(AVG(a.MeanThroughputMbps), 2) AS avg_mbps FROM `measurement-lab.ndt.ndt7` -WHERE DATE(a.TestTime) BETWEEN '2024-01-01' AND '2024-12-31' +WHERE date BETWEEN '2024-01-01' AND '2024-12-31' AND client.Geo.CountryCode = 'US' GROUP BY region ORDER BY test_count DESC @@ -79,13 +82,15 @@ ORDER BY test_count DESC ### Temporal Trend Analysis + ```sql +-- Temporal trend analysis SELECT DATE_TRUNC(DATE(a.TestTime), MONTH) AS month, ROUND(APPROX_QUANTILES(a.MeanThroughputMbps, 100)[OFFSET(50)], 2) AS median_mbps FROM `measurement-lab.ndt.ndt7` -WHERE DATE(a.TestTime) BETWEEN '2020-01-01' AND '2024-12-31' +WHERE date BETWEEN '2020-01-01' AND '2024-12-31' AND client.Network.ASNumber = 7922 -- Comcast as example AND a.MeanThroughputMbps > 0 GROUP BY month diff --git a/src/content/articles/test-ndt.md b/src/content/articles/test-ndt.md index 779e4ba..e65ee17 100644 --- a/src/content/articles/test-ndt.md +++ b/src/content/articles/test-ndt.md @@ -91,23 +91,26 @@ The current recommended view for general use is `measurement-lab.ndt.ndt7_union` **A simple first query** — average download speed by country over the last 30 days: + ```sql +-- Average download speed by country over past 30-days SELECT client.Geo.CountryCode AS country, ROUND(AVG(a.MeanThroughputMbps), 2) AS avg_download_mbps, COUNT(*) AS test_count -FROM `measurement-lab.ndt.ndt7_union` -WHERE DATE(a.TestTime) >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) +FROM `measurement-lab.ndt.ndt7` +WHERE date >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) AND a.MeanThroughputMbps > 0 AND a.MeanThroughputMbps < 10000 GROUP BY country ORDER BY avg_download_mbps DESC -LIMIT 20 ``` **Key fields for analysis:** + ```sql +-- Key fields SELECT a.TestTime, a.MeanThroughputMbps, -- download speed in Mbps @@ -117,10 +120,9 @@ SELECT client.Geo.Region, client.Network.ASNumber, client.Network.ASName -FROM `measurement-lab.ndt.ndt7_union` -WHERE DATE(a.TestTime) = '2024-01-01' +FROM `measurement-lab.ndt.ndt7` +WHERE date = '2024-01-01' AND a.MeanThroughputMbps > 0 -LIMIT 100 ``` Always filter by `DATE(a.TestTime)` to use BigQuery's partition pruning — without it you'll scan the entire multi-terabyte table. diff --git a/src/content/articles/test-reverse-traceroute.md b/src/content/articles/test-reverse-traceroute.md index c9f2706..c477665 100644 --- a/src/content/articles/test-reverse-traceroute.md +++ b/src/content/articles/test-reverse-traceroute.md @@ -76,7 +76,9 @@ The primary table is `measurement-lab.revtr_raw.revtr1`. Additional supporting t **Query 1 — High-quality reverse paths only** (reaches destination, no interdomain symmetry, no type-4-induced loops): + ```sql +-- High quality reveres paths only SELECT date, raw FROM `measurement-lab.revtr_raw.revtr1` WHERE DATE(date) = '2024-06-01' @@ -99,7 +101,9 @@ WHERE DATE(date) = '2024-06-01' **Query 2 — Failure breakdown by reason:** + ```sql +-- Failure breakdown by reason SELECT DATE(date) AS day, raw.fail_reason, COUNT(*) AS count FROM `measurement-lab.revtr_raw.revtr1` WHERE date BETWEEN '2024-06-01' AND '2024-06-07' @@ -109,7 +113,9 @@ ORDER BY day, count DESC **Query 3 — Join forward and reverse paths for a given measurement:** + ```sql +-- Join forward and reverse paths SELECT t1.raw AS revtr_data, t2.raw AS trace_data FROM `measurement-lab.revtr_raw.revtr1` AS t1 CROSS JOIN UNNEST(t1.raw.revtr_hops) AS hop diff --git a/src/content/articles/test-wehe.md b/src/content/articles/test-wehe.md index 961cd5c..f420fa7 100644 --- a/src/content/articles/test-wehe.md +++ b/src/content/articles/test-wehe.md @@ -57,7 +57,9 @@ Raw WeHe data is available at: **Find tests where differentiation was detected on cellular networks (last 7 days):** + ```sql +-- Find tests where differentiation was detected on cellular networks WITH info AS ( SELECT raw.* FROM `measurement-lab.wehe_raw.replayInfo1` @@ -83,7 +85,9 @@ LIMIT 1000 **Count tests and differentiation detections per carrier (last 7 days):** + ```sql +-- Count tests and differentiation detections per carrier WITH info AS ( SELECT raw.* FROM `measurement-lab.wehe_raw.replayInfo1`