Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
114 commits
Select commit Hold shift + click to select a range
2e2a416
fix(pgsql): support aggregate operator projections
zinic May 19, 2026
404d4b6
fix(cypher): prefer supported integer conversion
zinic May 20, 2026
3c77e32
docs(pgsql): capture optimizer pass plan
zinic May 20, 2026
25a7488
feat(pgsql): analyze optimizer regions
zinic May 20, 2026
e57cac3
test(pgsql): cover optimizer path safety
zinic May 20, 2026
bd45f89
feat(pgsql): add optimizer pipeline hook
zinic May 20, 2026
ea09b7e
feat(pgsql): attach optimizer predicates
zinic May 20, 2026
0763c60
test(integration): stabilize optimizer fixture coverage
zinic May 20, 2026
ea6cfe5
chore(pgsql): harden optimizer foundation
zinic May 20, 2026
ce87c0e
feat(pgsql): prune expansion path projections
zinic May 20, 2026
35ab840
feat(pgsql): materialize path edges late
zinic May 20, 2026
2972ed6
docs(pgsql): sequence optimizer review followups
zinic May 21, 2026
40fa3ef
fix(pgsql): preserve optional match pruning barrier
zinic May 21, 2026
6d9d542
test(integration): assert optimized path semantics
zinic May 21, 2026
5e994de
test(pgsql): guard relationship expression materialization
zinic May 21, 2026
d6fb01e
docs(pgsql): capture optimizer measurement gaps
zinic May 21, 2026
74099c0
test(pgsql): update optional match barrier shape
zinic May 21, 2026
e874334
feat(pgsql): lower bound fixed hops as expand-into
zinic May 21, 2026
f891fc4
feat(pgsql): reorder independent node anchors
zinic May 21, 2026
f696b3b
feat(pgsql): push fixed suffix checks into expansions
zinic May 21, 2026
9fd9ee0
docs(pgsql): sequence optimizer gap closure plan
zinic May 21, 2026
7007774
feat(pgsql): close optimizer suffix pushdown gaps
zinic May 21, 2026
de68acc
test(pgsql): complete optimizer gap closure
zinic May 21, 2026
7364218
test(pgsql): measure optimizer rules locally
zinic May 21, 2026
475d28d
feat(pgsql): add optimizer lowering metadata contract
zinic May 21, 2026
f55b969
refactor(pgsql): lift projection pruning decisions into optimizer
zinic May 21, 2026
cf4768a
refactor(pgsql): lift late path materialization decisions
zinic May 21, 2026
2378915
refactor(pgsql): lift expansion suffix pushdown detection
zinic May 21, 2026
d9218b8
feat(pgsql): report fixed-hop expand-into decisions
zinic May 21, 2026
9cc5887
feat(pgsql): wire predicate attachments into lowering metadata
zinic May 21, 2026
959fb07
feat(pgsql): prefer optimizer lowering decisions in translator
zinic May 21, 2026
17fdd8b
test(pgsql): lock lowering metadata verification
zinic May 21, 2026
77f74df
fix(pgsql): harden optimizer lowering metadata
zinic May 21, 2026
a2bab00
docs(pgsql): document lowering metadata contract
zinic May 22, 2026
901b178
feat(pgsql): consume expand-into lowering decisions
zinic May 22, 2026
7789157
refactor(pgsql): lift projection pruning binding actions
zinic May 22, 2026
4f9ecfa
feat(pgsql): apply late materialization decisions explicitly
zinic May 22, 2026
9eded59
feat(pgsql): record consumed predicate placements
zinic May 22, 2026
793b967
feat(pgsql): carry suffix pushdown source spans
zinic May 22, 2026
888478a
refactor(pgsql): remove targeted lowering fallbacks
zinic May 22, 2026
441e7ce
feat(pgsql): plan expand-into for anonymous continuations
zinic May 22, 2026
e58de1e
refactor(pgsql): lift rewrite decisions into optimizer plan
zinic May 22, 2026
abd93fd
feat(pgsql): plan projection pruning for pattern predicates
zinic May 22, 2026
7f87f86
feat(pgsql): plan pattern predicate placement
zinic May 22, 2026
eecf505
refactor(pgsql): centralize selectivity and locality planning
zinic May 22, 2026
14392f0
fix(pgsql): address optimizer review feedback
zinic May 22, 2026
ebfcbe5
fix(pgsql): stage path nodes in tail predicates
zinic May 22, 2026
9c5c4c6
test(integration): validate ADCS optimizer fanout rewrite
zinic May 22, 2026
5bcd7a0
feat(plancorpus): add Cypher plan corpus capture tooling
zinic May 22, 2026
1f97efa
feat(plancorpus): add count fast path and skipped lowering reporting
zinic May 22, 2026
efd22b8
feat(pgsql): stage repeated path projection components
zinic May 22, 2026
ffceecc
feat(pgsql): plan traversal flips for endpoint predicates
zinic May 22, 2026
8ff5420
feat(pgsql): extend suffix pushdown to constrained bound endpoints
zinic May 22, 2026
9807c3f
test(integration): stabilize corpus validation
zinic May 23, 2026
9d0b3cb
feat(pgsql): record predicate placement consumption
zinic May 23, 2026
346686b
fix(pgsql): constrain predicate placement planning to clause
zinic May 23, 2026
e580826
fix(pgsql): preserve endpoints in edge count fast path
zinic May 23, 2026
5dd087e
feat(plancorpus): count partially skipped lowerings
zinic May 23, 2026
54d8b2b
fix(plancorpus): honor Neo4j connection URIs
zinic May 23, 2026
202b214
feat(pgsql): wire count star fast path planning
zinic May 23, 2026
ee9194a
test(pgsql): validate optimization gap fixes
zinic May 23, 2026
249c463
fix(pgsql): use typed text lookups for string equality
zinic May 23, 2026
9a5f2e5
test(pgsql): cover typed string equality translation
zinic May 23, 2026
c11dcce
test(pgsql): add string equality plan coverage
zinic May 23, 2026
7cca401
feat(pgsql): add edge kind count index
zinic May 23, 2026
65f79e4
test(pgsql): cover count fast path SQL shapes
zinic May 23, 2026
9e53934
docs(pgsql): document optimizer index assumptions
zinic May 23, 2026
ee648bf
test(pgsql): align optimizer safety string expectations
zinic May 23, 2026
1dd0cc3
docs(pgsql): record optimizer validation status
zinic May 23, 2026
17fcd0a
test(integration): add optimizer cases
zinic May 23, 2026
5aa4e7a
perf(pgsql): optimize typed pattern predicates
zinic May 23, 2026
181596d
feat(pgsql): lower membership-only collects to ids
zinic May 23, 2026
835a402
perf(pgsql): flip bound expansions to constrained terminals
zinic May 23, 2026
1a34875
feat(pgsql): plan terminal filters for kind-only shortest paths
zinic May 23, 2026
4c2bbf6
refactor(pgsql): defer blanket suffix indexing
zinic May 23, 2026
c31f1de
test(pgsql): update translation snapshots for optimizer lowerings
zinic May 23, 2026
3c446ac
test(integration): correct bounded Azure path assertion
zinic May 23, 2026
d8b4835
feat(pgsql): continue lowering and live query optimization
zinic May 23, 2026
8980a41
test(integration): add live aggregate traversal plan guard
zinic May 23, 2026
33af724
feat(pgsql): report skipped kind-only traversal flips
zinic May 23, 2026
59d4e9f
fix(pgsql): widen aggregate traversal count matching
zinic May 23, 2026
20ace15
fix(pgsql): respect selective bound traversal sources
zinic May 23, 2026
668a975
test(integration): expand aggregate traversal baseline coverage
zinic May 23, 2026
a3027ea
fix(pgsql): widen aggregate traversal final projections
zinic May 23, 2026
7b552ca
feat(pgsql): carry selectivity through traversal lowerings
zinic May 23, 2026
0a0e1c6
perf(pgsql): fold terminal filters into aggregate traversal
zinic May 23, 2026
6ca4b19
docs(pgsql): document aggregate optimizer continuation status
zinic May 23, 2026
5172fbf
feat(graphbench): add scale corpus contract
zinic May 24, 2026
2f7654e
feat(graphbench): add PostgreSQL SQL runner
zinic May 24, 2026
f6912e2
feat(graphbench): add Neo4j runner
zinic May 24, 2026
2c0a4ce
feat(graphbench): add local traversal placeholder
zinic May 24, 2026
d6efb92
feat(graphbench): add comparison reports
zinic May 24, 2026
cf6d967
docs(graphbench): document AGE reference workflow
zinic May 24, 2026
90c82fa
fix(build): repair optimizer build
zinic May 24, 2026
a230029
fix(benchmark): harden resource handling
zinic May 24, 2026
73a1e00
fix(pgsql): address optimizer and translation edge cases
zinic May 24, 2026
a238385
fix(neo4j): harden database parsing and plan assertions
zinic May 24, 2026
d36e765
docs(integration): document backend-selected skips
zinic May 24, 2026
1f063b4
style: group related local declarations
zinic May 24, 2026
e8ae48a
fix(pgsql): handle optimizer locality selectivity edge cases
zinic May 24, 2026
ea6e05d
fix(pgsql): harden lowering plan carryover
zinic May 24, 2026
3ad9437
fix(graphbench): correct p95 ranking
zinic May 24, 2026
6a22865
fix(plancorpus): close records on write errors
zinic May 24, 2026
ec81c14
test(integration): seed abuse delegation fixture kind
zinic May 24, 2026
f5b892d
test(integration): assert connected cross-forest trust paths
zinic May 24, 2026
228f20f
test(integration): guard fixture-backed kind seeding
zinic May 24, 2026
9659bd0
test(pgsql): cover aggregate traversal predicate parameters
zinic May 25, 2026
50645cf
refactor(pgsql): add aggregate predicate translator helper
zinic May 25, 2026
91f6e20
fix(pgsql): share aggregate predicate parameter namespace
zinic May 25, 2026
c086747
refactor(pgsql): merge aggregate predicate parameters once
zinic May 25, 2026
78692bd
test(pgsql): retain aggregate traversal dependency guard
zinic May 25, 2026
ccd4e78
test: validate aggregate traversal predicate fix
zinic May 25, 2026
010b481
test: smoke downstream DAWGS consumers
zinic May 25, 2026
531911a
fix: address coderabbit review findings
zinic May 25, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ QUALITY_INPUTS += -mutation-report $(MUTATION_REPORT)
endif
QUALITY_INPUTS += -benchmark-regression $(BENCHMARK_REGRESSION)

.PHONY: default all build deps tidy lint format test test_all test_integration test_neo4j test_pg test_update complexity complexity_check crap crap_check quality quality_check quality_backend quality_bench metrics metrics_check generate clean help
.PHONY: default all build deps tidy lint format test test_all test_integration test_neo4j test_pg test_update plan_corpus complexity complexity_check crap crap_check quality quality_check quality_backend quality_bench metrics metrics_check generate clean help

# Default target
default: help
Expand Down Expand Up @@ -109,6 +109,10 @@ test_update:
@cp -fv cypher/models/pgsql/test/updated_cases/* cypher/models/pgsql/test/translation_cases
@rm -rf cypher/models/pgsql/test/updated_cases

plan_corpus: $(METRICS_DIR)
@echo "Capturing Cypher plan corpus..."
@$(GO_CMD) run ./cmd/plancorpus

# Metric targets
$(METRICS_DIR):
@mkdir -p $(METRICS_DIR)
Expand Down Expand Up @@ -218,6 +222,7 @@ help:
@echo " test_bench - Run benchmark test"
@echo " test_neo4j - Run Neo4j integration tests"
@echo " test_pg - Run PostgreSQL integration tests"
@echo " plan_corpus - Capture shared corpus query plans for configured backends"
@echo " test_update - Update test cases"
@echo " complexity - Report cyclomatic complexity"
@echo " crap - Report CRAP scores from unit test coverage"
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ export CONNECTION_STRING="postgresql://dawgs:weneedbetterpasswords@localhost:654
export CONNECTION_STRING="neo4j://neo4j:weneedbetterpasswords@localhost:7687"
```

Neo4j connection strings may use `neo4j://`, `neo4j+s://`, or `neo4j+ssc://`; a single path segment selects the Neo4j database name.

Use `make test` for unit tests only and `make test_integration` for integration tests only.

### Test Metrics
Expand Down Expand Up @@ -95,6 +97,24 @@ make quality FUZZ_REPORT=.coverage/fuzz.json MUTATION_REPORT=.coverage/mutation.
`PG_CONNECTION_STRING` and `NEO4J_CONNECTION_STRING`. `make quality_bench` writes benchmark markdown and JSON captures
for later baseline comparison.

`make plan_corpus` captures plan diagnostics for the shared Cypher integration corpus. It accepts either
`CONNECTION_STRING` for one backend or `PG_CONNECTION_STRING` and `NEO4J_CONNECTION_STRING` for both backends, then
writes JSONL captures and markdown/JSON summaries under `.coverage/`.

`go run ./cmd/graphbench` captures runtime diagnostics for the scale corpus under `benchmark/testdata/scale`. The
current modes are `postgres_sql`, `local_traversal`, and `neo4j`; AGE is reference-design input only and is not a direct
comparison mode yet. The command can emit JSONL records plus Markdown and JSON summaries, and can compare current timings
against a previous JSONL baseline.

PostgreSQL translates exact string property equality with a JSON string type guard and `properties ->>` extraction, so
indexes created on expressions such as `properties ->> 'objectid'` and `properties ->> 'name'` can be used for selective
anchors without matching JSON booleans or numbers. Simple relationship count fast paths depend on the schema's
`kind_id`-first edge index for efficient typed counts.

Substring and suffix predicates are intentionally not promoted to blanket schema indexes. PostgreSQL deployments can
request explicit `TextSearchIndex`/trigram property indexes for fields that need `CONTAINS`, `STARTS WITH`, or
`ENDS WITH`, but default schema assertion should wait until all suffix forms share one semantics-preserving lowering.

Thresholds are report-only by default. To enforce the configured thresholds, run:

```bash
Expand Down
28 changes: 28 additions & 0 deletions benchmark/testdata/scale/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# GraphBench Scale Corpus

This corpus measures graph workload shapes, not general Cypher correctness.
The shared integration corpus remains the source of backend-equivalent semantic
coverage.

Cases declare the values a query observes so benchmark reports can separate
ID-only work from node, relationship, property, and path materialization.
Current execution modes are `postgres_sql`, `local_traversal`, and `neo4j`.
Apache AGE is intentionally not a benchmark mode here; it may appear only in
`reference_design` notes as input for DAWGS design choices.

Each JSON file contains a list of scale cases with:

- `source`: the source corpus or workload family.
- `dataset`: the fixture dataset to load from `integration/testdata`.
- `name` and `category`: stable identifiers used in reports.
- `cypher`: the Cypher query under test.
- `parameters`: named parameter values.
- `expected_rows`: the expected result cardinality.
- `observes`: whether the query observes paths, nodes, relationships,
properties, or only IDs internally.
- `candidate_modes`: the execution modes that should attempt the case.
- `reference_design`: optional design notes, including AGE observations when
useful.

Use `cmd/graphbench` to run this corpus and produce JSONL, Markdown, and JSON
summaries.
70 changes: 70 additions & 0 deletions benchmark/testdata/scale/cases/counts.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"cases": [
{
"name": "all_node_count",
"dataset": "base",
"category": "counts",
"cypher": "MATCH (n) RETURN count(n)",
"expected": {
"row_count": 1,
"result_kind": "scalar"
},
"observes": {
"paths": false,
"nodes": false,
"relationships": false,
"properties": false
},
"shape": {
"path_materialization_required": false
},
"candidate_modes": ["postgres_sql", "neo4j"],
"tags": ["count", "count-store"]
},
{
"name": "typed_node_count",
"dataset": "base",
"category": "counts",
"cypher": "MATCH (n:NodeKind1) RETURN count(n)",
"expected": {
"row_count": 1,
"result_kind": "scalar"
},
"observes": {
"paths": false,
"nodes": false,
"relationships": false,
"properties": false
},
"shape": {
"terminal_predicate": "node_kind",
"path_materialization_required": false
},
"candidate_modes": ["postgres_sql", "neo4j"],
"tags": ["count", "typed-count", "graph-stats"]
},
{
"name": "typed_edge_count",
"dataset": "base",
"category": "counts",
"cypher": "MATCH ()-[r:EdgeKind1]->() RETURN count(r)",
"expected": {
"row_count": 1,
"result_kind": "scalar"
},
"observes": {
"paths": false,
"nodes": false,
"relationships": false,
"properties": false
},
"shape": {
"edge_kinds": ["EdgeKind1"],
"path_materialization_required": false
},
"candidate_modes": ["postgres_sql", "neo4j"],
"tags": ["count", "typed-count", "graph-stats"]
}
]
}

54 changes: 54 additions & 0 deletions benchmark/testdata/scale/cases/lookups.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"cases": [
{
"name": "objectid_exact_string_anchor",
"dataset": "base",
"category": "lookups",
"cypher": "MATCH (n:NodeKind1) WHERE n.objectid = $objectid RETURN id(n)",
"params": {
"objectid": "S-1-5-21-1"
},
"expected": {
"row_count": 1,
"result_kind": "id_set"
},
"observes": {
"paths": false,
"nodes": false,
"relationships": false,
"properties": false
},
"shape": {
"root_predicate": "selective_property",
"terminal_predicate": "node_kind",
"path_materialization_required": false
},
"candidate_modes": ["postgres_sql", "neo4j"],
"tags": ["property-anchor", "expression-index"]
},
{
"name": "boolean_property_filter",
"dataset": "base",
"category": "lookups",
"cypher": "MATCH (n:NodeKind1) WHERE n.enabled = true RETURN id(n)",
"expected": {
"row_count": 1,
"result_kind": "id_set"
},
"observes": {
"paths": false,
"nodes": false,
"relationships": false,
"properties": false
},
"shape": {
"root_predicate": "boolean_property",
"terminal_predicate": "node_kind",
"path_materialization_required": false
},
"candidate_modes": ["postgres_sql", "neo4j"],
"tags": ["property-filter"]
}
]
}

61 changes: 61 additions & 0 deletions benchmark/testdata/scale/cases/shortest_paths.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"cases": [
{
"name": "shortest_distance_bound_pair",
"dataset": "base",
"category": "shortest_path",
"cypher": "MATCH p = shortestPath((s)-[*1..]->(e)) WHERE id(s) = $start_id AND id(e) = $end_id RETURN length(p)",
"node_params": {
"start_id": "n1",
"end_id": "n3"
},
"expected": {
"row_count": 1,
"result_kind": "scalar"
},
"observes": {
"paths": false,
"nodes": false,
"relationships": false,
"properties": false
},
"shape": {
"root_predicate": "bound_id",
"terminal_predicate": "bound_id",
"min_depth": 1,
"path_materialization_required": false
},
"candidate_modes": ["postgres_sql", "local_traversal", "neo4j"],
"tags": ["shortest-distance", "local-traversal-candidate"]
},
{
"name": "one_shortest_path_bound_pair",
"dataset": "base",
"category": "shortest_path",
"cypher": "MATCH p = shortestPath((s)-[*1..]->(e)) WHERE id(s) = $start_id AND id(e) = $end_id RETURN p LIMIT 1",
"node_params": {
"start_id": "n1",
"end_id": "n3"
},
"expected": {
"row_count": 1,
"result_kind": "path_set"
},
"observes": {
"paths": true,
"nodes": true,
"relationships": true,
"properties": true
},
"shape": {
"root_predicate": "bound_id",
"terminal_predicate": "bound_id",
"min_depth": 1,
"path_materialization_required": true
},
"candidate_modes": ["postgres_sql", "local_traversal", "neo4j"],
"tags": ["one-shortest-path", "local-traversal-candidate"]
}
]
}

Loading
Loading