diff --git a/docs/database-query-observability.md b/docs/database-query-observability.md new file mode 100644 index 00000000..c3bc25cf --- /dev/null +++ b/docs/database-query-observability.md @@ -0,0 +1,69 @@ +# Database query observability + +Maple gives every database call first-class treatment using the standard +[OpenTelemetry database semantic conventions](https://opentelemetry.io/docs/specs/semconv/db/database-spans/). +If your services are instrumented with an OTel-aware database client — which most +SDKs enable automatically — you get, with **no extra configuration**: + +- **Query timing inline in every trace.** A database call's client span shows up + in the waterfall like any other span, and its detail panel renders a database + summary block (system, namespace, table, operation, rows returned, server) + derived from the `db.*` attributes. +- **A cross-service Queries surface.** Every distinct query *shape* (the query + with literals normalized to `?`) is aggregated across services with call + volume, error rate, and p50/p95/p99 latency, so you can find your slowest and + busiest queries and drill straight to sample traces. + +This works for **any** database — PostgreSQL, MySQL, ClickHouse, Redis, MongoDB, +and more — because it reads only the vendor-neutral semantic conventions. + +## Attributes Maple reads + +| Attribute | Used for | +| --- | --- | +| `db.system.name` (legacy `db.system`) | Identifies the database; drives the summary block and per-system grouping. | +| `db.query.text` (legacy `db.statement`) | The query; normalized into a low-cardinality **shape** for grouping. | +| `db.query.summary` | Preferred human label for a query shape (e.g. `SELECT users`). | +| `db.operation.name`, `db.collection.name`, `db.namespace` | Compose a label when `db.query.summary` is absent. | +| `db.query.fingerprint` (legacy `db.statement.fingerprint`) | Explicit grouping key when the instrumentation provides one. | +| `db.response.returned_rows` | Rows returned, shown in the span summary. | +| `db.operation.batch.size` | Batch size (only present for batches). | +| `server.address` / `server.port` | The database endpoint. | +| `error.type`, `db.response.status_code` | Failure outcome. | + +Query text is grouped by *shape*: literals are stripped to `?` and `IN (...)` +lists are collapsed, so `WHERE id = 1` and `WHERE id = 2` are the same shape. +Prefer emitting parameterized `db.query.text` (the OTel spec says parameterized +text should **not** be sanitized) so shapes stay clean. + +## Correlating server-side query logs with traces (SQLCommenter) + +The client span above captures the query *as the caller sees it* — duration and +the query text — but it cannot see server-side detail such as memory used or +rows/bytes scanned. To bridge that gap, tag your queries with **SQLCommenter**, +the OpenTelemetry-standard way to propagate trace context into the database by +appending a comment to the query: + +```sql +SELECT * FROM events WHERE ts > ? /*traceparent='00---01'*/ +``` + +Most OTel database instrumentations can inject this for you (it is opt-in — see +your SDK's SQLCommenter / "DB statement comment" option). Because the database +records the full query text — comment included — in its query log, Maple can +read that log back (see the ClickHouse integration) and stitch each server-side +query to the exact client span that issued it, nesting it as a child in the +trace. + +> Note: SQLCommenter comments are low-cardinality-unfriendly for MySQL prepared +> statements, Oracle, and SQL Server; consult the OTel guidance before enabling +> it broadly on those engines. + +## Resource allocation (ClickHouse) + +Server-side **resource allocation** — peak memory, rows/bytes read, CPU time, +ProfileEvents — is not available from client spans. For ClickHouse, connect your +cluster via the ClickHouse integration: Maple polls `system.query_log`, forwards +each sampled query into Maple as a span (nested under your app's trace via the +SQLCommenter `traceparent` above) plus aggregate metrics, so query timing and +resource allocation land alongside your existing traces and dashboards. diff --git a/packages/domain/package.json b/packages/domain/package.json index a08c8318..5d6e88be 100644 --- a/packages/domain/package.json +++ b/packages/domain/package.json @@ -10,6 +10,7 @@ "./primitives": "./src/primitives.ts", "./query-engine": "./src/query-engine.ts", "./recommendations": "./src/recommendations.ts", + "./sqlcommenter": "./src/sqlcommenter.ts", "./tinybird-project-sync": "./src/tinybird/project-sync.ts", "./warehouse-queries": "./src/warehouse-queries.ts", "./tinybird": "./src/tinybird/index.ts", diff --git a/packages/domain/src/sqlcommenter.test.ts b/packages/domain/src/sqlcommenter.test.ts new file mode 100644 index 00000000..042db224 --- /dev/null +++ b/packages/domain/src/sqlcommenter.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from "vitest" +import { parseSqlCommenterTraceparent } from "./sqlcommenter" + +const TRACE_ID = "0af7651916cd43dd8448eb211c80319c" +const SPAN_ID = "b7ad6b7169203331" + +describe("parseSqlCommenterTraceparent", () => { + it("extracts trace context from a trailing SQLCommenter comment", () => { + const sql = `SELECT * FROM songs WHERE id = ? /*traceparent='00-${TRACE_ID}-${SPAN_ID}-01'*/` + expect(parseSqlCommenterTraceparent(sql)).toEqual({ + traceId: TRACE_ID, + spanId: SPAN_ID, + flags: "01", + sampled: true, + }) + }) + + it("reads the unsampled flag (00)", () => { + const sql = `SELECT 1 /*traceparent='00-${TRACE_ID}-${SPAN_ID}-00'*/` + expect(parseSqlCommenterTraceparent(sql)?.sampled).toBe(false) + }) + + it("finds the comment alongside other sqlcommenter keys", () => { + const sql = `SELECT 1 /*db_driver='clickhouse',traceparent='00-${TRACE_ID}-${SPAN_ID}-01',route='%2Fusers'*/` + expect(parseSqlCommenterTraceparent(sql)?.traceId).toBe(TRACE_ID) + }) + + it("is tolerant of a URL-encoded value, extra whitespace, and uppercase hex", () => { + const sql = `SELECT 1 /* traceparent = '00-${TRACE_ID.toUpperCase()}-${SPAN_ID.toUpperCase()}-01' */` + expect(parseSqlCommenterTraceparent(sql)?.spanId).toBe(SPAN_ID) + }) + + it("returns null when there is no traceparent comment", () => { + expect(parseSqlCommenterTraceparent("SELECT * FROM songs")).toBeNull() + expect(parseSqlCommenterTraceparent("")).toBeNull() + expect(parseSqlCommenterTraceparent(null)).toBeNull() + expect(parseSqlCommenterTraceparent(undefined)).toBeNull() + }) + + it("rejects a malformed traceparent (wrong lengths / all-zero ids)", () => { + expect(parseSqlCommenterTraceparent("SELECT 1 /*traceparent='00-tooshort-abc-01'*/")).toBeNull() + expect( + parseSqlCommenterTraceparent( + `SELECT 1 /*traceparent='00-${"0".repeat(32)}-${SPAN_ID}-01'*/`, + ), + ).toBeNull() + expect( + parseSqlCommenterTraceparent( + `SELECT 1 /*traceparent='00-${TRACE_ID}-${"0".repeat(16)}-01'*/`, + ), + ).toBeNull() + }) +}) diff --git a/packages/domain/src/sqlcommenter.ts b/packages/domain/src/sqlcommenter.ts new file mode 100644 index 00000000..1110f277 --- /dev/null +++ b/packages/domain/src/sqlcommenter.ts @@ -0,0 +1,73 @@ +/** + * SQLCommenter (https://google.github.io/sqlcommenter/) trace-context parsing. + * + * SQLCommenter — now merged into the OpenTelemetry specification as the standard + * way to correlate database queries with APM traces — propagates trace context + * into a database by appending a machine-readable comment to the query text: + * + * SELECT * FROM songs WHERE id = ? /​*traceparent='00---01'*​/ + * + * The database records the full query (comment included) in its query log + * (e.g. ClickHouse `system.query_log`), so reading that log back lets us stitch + * a server-side query row to the client span that issued it — nesting the + * server-side query as a child of the app's DB span. + * + * This module extracts the W3C `traceparent` from such a comment. Pure string + * parsing, no imports, so it is safe to pull into the web / cli / scraper + * bundles alike. + */ + +/** The W3C trace-context fields carried by a `traceparent`. */ +export interface Traceparent { + /** 32-hex-char trace id (lowercase). */ + readonly traceId: string + /** 16-hex-char parent span id (lowercase). */ + readonly spanId: string + /** 2-hex-char trace-flags byte (e.g. "01"). */ + readonly flags: string + /** Whether the `sampled` flag (bit 0 of trace-flags) is set. */ + readonly sampled: boolean +} + +// version "-" trace-id "-" span-id "-" trace-flags, all lowercase hex. +const TRACEPARENT_RE = /^([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/ + +// Pull `traceparent=''` out of a SQLCommenter comment. Per the spec the +// value is URL-encoded and single-quoted; accept double quotes defensively. +const COMMENT_VALUE_RE = /traceparent\s*=\s*(['"])([^'"]+)\1/i + +/** An all-zero id is invalid per the W3C spec — treat it as absent. */ +const isAllZero = (hex: string): boolean => /^0+$/.test(hex) + +/** + * Extract the W3C `traceparent` from a SQLCommenter comment embedded anywhere in + * `sql`. Returns `null` when absent or malformed (all-zero ids counted as + * malformed). The parse is case- and whitespace-tolerant and URL-decodes the + * value defensively. + */ +export function parseSqlCommenterTraceparent(sql: string | null | undefined): Traceparent | null { + if (!sql) return null + + const commentMatch = COMMENT_VALUE_RE.exec(sql) + if (!commentMatch) return null + + let raw = commentMatch[2] + try { + raw = decodeURIComponent(raw) + } catch { + // Leave `raw` as-is when it isn't valid percent-encoding. + } + + const parts = TRACEPARENT_RE.exec(raw.trim().toLowerCase()) + if (!parts) return null + + const [, , traceId, spanId, flags] = parts + if (isAllZero(traceId) || isAllZero(spanId)) return null + + return { + traceId, + spanId, + flags, + sampled: (Number.parseInt(flags, 16) & 0x01) === 1, + } +} diff --git a/packages/query-engine/src/ch/queries/errors.ts b/packages/query-engine/src/ch/queries/errors.ts index d542a6e2..7162f453 100644 --- a/packages/query-engine/src/ch/queries/errors.ts +++ b/packages/query-engine/src/ch/queries/errors.ts @@ -151,6 +151,13 @@ const TREE_SPAN_ATTR_KEYS = [ "cache.name", "cache.operation", "cache.lookup_performed", + // Generic OpenTelemetry database-client spans — the `db.system.name` signal + // (with the legacy `db.system` fallback) lets the trace views detect a DB + // span and render its summary badge without waiting for the per-span lazy + // detail fetch. The full `db.*` field set (namespace, operation, rows, + // server, …) is loaded lazily by `spanDetailQuery` for the detail panel. + "db.system.name", + "db.system", // Cloudflare Workers Observability — read by `getCloudflareInfo` to mark // Worker spans and render the edge-location + outcome badges in the tree // views. The full set (ray id, cpu/wall time, script version, geo city) is diff --git a/packages/ui/src/lib/__tests__/cloud-platforms.test.ts b/packages/ui/src/lib/__tests__/cloud-platforms.test.ts index 5586a752..aacc9c09 100644 --- a/packages/ui/src/lib/__tests__/cloud-platforms.test.ts +++ b/packages/ui/src/lib/__tests__/cloud-platforms.test.ts @@ -75,12 +75,11 @@ describe("getCloudPlatform — cloudflare", () => { expect(byLabel["Handler"]).toBe("queue") }) - it("returns null for a non-platform span", () => { + it("returns null for a non-platform span (no cloud, no db)", () => { expect( getCloudPlatform({ "http.method": "GET", "http.route": "/v1/spans", - "db.system": "postgresql", }), ).toBeNull() }) @@ -101,6 +100,48 @@ describe("getCloudPlatform — cloudflare", () => { }) }) +describe("getCloudPlatform — database", () => { + it("normalizes a DB-client span and humanizes db.system.name", () => { + const info = getCloudPlatform({ + "db.system.name": "postgresql", + "db.namespace": "app", + "db.collection.name": "users", + "db.operation.name": "SELECT", + "db.response.returned_rows": "42", + "server.address": "db.internal", + "server.port": "5432", + }) + expect(info?.id).toBe("database") + expect(info?.label).toBe("PostgreSQL") + expect(info?.outcome).toBeNull() + const byLabel = Object.fromEntries(info!.fields.map((f) => [f.label, f.value])) + expect(byLabel["Operation"]).toBe("SELECT") + expect(byLabel["Namespace"]).toBe("app") + expect(byLabel["Table"]).toBe("users") + expect(byLabel["Rows returned"]).toBe("42") + expect(byLabel["Server"]).toBe("db.internal:5432") + }) + + it("falls back to the legacy db.system and title-cases unknown systems", () => { + expect(getCloudPlatform({ "db.system": "clickhouse" })?.label).toBe("ClickHouse") + expect(getCloudPlatform({ "db.system.name": "microsoft.sql_server" })?.label).toBe("SQL Server") + expect(getCloudPlatform({ "db.system.name": "cockroachdb" })?.label).toBe("CockroachDB") + }) + + it("flags error.type as a bad outcome", () => { + const info = getCloudPlatform({ "db.system.name": "mysql", "error.type": "timeout" }) + expect(info?.outcome).toEqual({ value: "timeout", bad: true }) + }) + + // Same projected-map regression as cloudflare: an empty db.system value must + // not flag every span as a database call. + it("returns null when db.system keys are present but empty (projected map)", () => { + expect( + getCloudPlatform({ "db.system.name": "", "db.system": "", "http.route": "/checkout" }), + ).toBeNull() + }) +}) + describe("outcomeBadgeStyle", () => { it("styles ok vs failure outcomes differently", () => { expect(outcomeBadgeStyle(false)).toContain("severity-info") diff --git a/packages/ui/src/lib/cloud-platforms/database.ts b/packages/ui/src/lib/cloud-platforms/database.ts new file mode 100644 index 00000000..3231b6cd --- /dev/null +++ b/packages/ui/src/lib/cloud-platforms/database.ts @@ -0,0 +1,133 @@ +import { DatabaseIcon } from "../../components/icons" +import type { CloudPlatformAdapter, CloudPlatformField } from "./types" +import { pickAttr } from "./types" + +// Generic OpenTelemetry database-client span annotations. Any DB-instrumented +// service (PostgreSQL, MySQL, ClickHouse, Redis, MongoDB, …) emits the `db.*` +// semantic-convention attributes on its CLIENT span; this adapter normalizes +// them into the shared summary block so a query's shape, target, and result +// size read inline in the trace — for every database, with no per-DB code. +// +// Reuses the same span-annotation registry as the cloud-platform adapters +// (cloudflare.ts, …); the normalized `CloudPlatformInfo` shape is generic +// enough to describe a database call as well as a serverless invocation. + +/** Display names for the well-known `db.system.name` values (stable semconv). */ +const DB_SYSTEM_LABELS: Record = { + postgresql: "PostgreSQL", + mysql: "MySQL", + mariadb: "MariaDB", + clickhouse: "ClickHouse", + redis: "Redis", + mongodb: "MongoDB", + elasticsearch: "Elasticsearch", + opensearch: "OpenSearch", + cassandra: "Cassandra", + cockroachdb: "CockroachDB", + sqlite: "SQLite", + oracle: "Oracle", + db2: "Db2", + "microsoft.sql_server": "SQL Server", + mssql: "SQL Server", + "aws.dynamodb": "DynamoDB", + dynamodb: "DynamoDB", + "aws.redshift": "Redshift", + redshift: "Redshift", + "gcp.spanner": "Spanner", + "azure.cosmosdb": "Cosmos DB", + cosmosdb: "Cosmos DB", + memcached: "Memcached", + couchbase: "Couchbase", + couchdb: "CouchDB", + neo4j: "Neo4j", + snowflake: "Snowflake", + trino: "Trino", + presto: "Presto", + hive: "Hive", + spanner: "Spanner", +} + +/** Brand-ish accent per system; a small tint on the 12px icon, muted otherwise. */ +const DB_SYSTEM_ACCENTS: Record = { + postgresql: "text-[#336791]", + mysql: "text-[#00758F]", + mariadb: "text-[#1F7A8C]", + clickhouse: "text-[#F5B400]", + redis: "text-[#DC382D]", + mongodb: "text-[#13AA52]", + elasticsearch: "text-[#00BFB3]", + opensearch: "text-[#00BFB3]", + cassandra: "text-[#1287B1]", + cockroachdb: "text-[#6933FF]", + sqlite: "text-[#0F80CC]", + oracle: "text-[#C74634]", + mssql: "text-[#CC2927]", + "microsoft.sql_server": "text-[#CC2927]", + dynamodb: "text-[#4053D6]", + "aws.dynamodb": "text-[#4053D6]", + redshift: "text-[#8C4FFF]", + snowflake: "text-[#29B5E8]", +} + +/** "microsoft.sql_server" → "SQL Server"; unknown → title-cased last segment. */ +function humanizeDbSystem(system: string): string { + const known = DB_SYSTEM_LABELS[system.toLowerCase()] + if (known) return known + const tail = system.split(".").pop() ?? system + return tail + .split(/[_\s-]+/) + .filter(Boolean) + .map((w) => w.charAt(0).toUpperCase() + w.slice(1)) + .join(" ") +} + +export const databaseAdapter: CloudPlatformAdapter = { + id: "database", + detect(attrs) { + // Require a real, non-empty `db.system.name` VALUE (stable semconv) — with + // the legacy `db.system` fallback. Never a key-presence check: the trimmed + // tree-view projection emits requested keys with empty-string values on + // every span, so presence alone would flag non-DB spans. + const system = pickAttr(attrs, "db.system.name", "db.system") + if (!system) return null + + const operation = pickAttr(attrs, "db.operation.name", "db.operation") + const namespace = pickAttr(attrs, "db.namespace") + const collection = pickAttr(attrs, "db.collection.name") + const rows = pickAttr(attrs, "db.response.returned_rows") + const batch = pickAttr(attrs, "db.operation.batch.size") + const statusCode = pickAttr(attrs, "db.response.status_code") + const serverAddress = pickAttr(attrs, "server.address", "network.peer.address") + const serverPort = pickAttr(attrs, "server.port", "network.peer.port") + const errorType = pickAttr(attrs, "error.type") + + const fields: CloudPlatformField[] = [] + if (operation) fields.push({ label: "Operation", value: operation }) + if (namespace) fields.push({ label: "Namespace", value: namespace }) + if (collection) fields.push({ label: "Table", value: collection }) + if (rows) fields.push({ label: "Rows returned", value: rows }) + // Per spec, `db.operation.batch.size` is only emitted for batches (never 1). + if (batch) fields.push({ label: "Batch size", value: batch }) + if (serverAddress) + fields.push({ + label: "Server", + value: serverPort ? `${serverAddress}:${serverPort}` : serverAddress, + copyable: true, + }) + if (statusCode) fields.push({ label: "Status", value: statusCode }) + + const outcome = errorType ? { value: errorType, bad: true } : null + + return { + id: "database", + label: humanizeDbSystem(system), + kind: "Query", + Icon: DatabaseIcon, + accentClassName: DB_SYSTEM_ACCENTS[system.toLowerCase()] ?? "text-muted-foreground", + edge: null, + location: null, + outcome, + fields, + } + }, +} diff --git a/packages/ui/src/lib/cloud-platforms/index.ts b/packages/ui/src/lib/cloud-platforms/index.ts index 046c2dbd..332a23b7 100644 --- a/packages/ui/src/lib/cloud-platforms/index.ts +++ b/packages/ui/src/lib/cloud-platforms/index.ts @@ -1,5 +1,6 @@ import type { CloudPlatformInfo } from "./types" import { cloudflareAdapter } from "./cloudflare" +import { databaseAdapter } from "./database" export type { CloudPlatformAdapter, @@ -20,9 +21,11 @@ export { outcomeBadgeStyle } from "./types" // id → a copyable field, etc. // 2. import it here and add it to `ADAPTERS`. // Order matters only if two adapters could match the same span; keep the most -// specific first. +// specific first. The `databaseAdapter` (generic `db.*` semconv) is a broad +// last resort — a serverless span and a DB-client span are disjoint in practice, +// but keep provider adapters ahead of it regardless. const ADAPTERS: ReadonlyArray<{ detect: (a: Record) => CloudPlatformInfo | null }> = - [cloudflareAdapter] + [cloudflareAdapter, databaseAdapter] /** First adapter that recognizes these span attributes, normalized; else null. */ export function getCloudPlatform(attrs: Record): CloudPlatformInfo | null {