From de069f7ff8a5c15bdaf6553a422d7a871062d231 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 14:01:19 +0530
Subject: [PATCH 01/33] feat(aggregation): implement stateless stage-based
 orchestration

- Reorganized aggregation logic into a cohesive 'aggregation' package.
- Implemented AggregationOrchestrator supporting stage-based parallel execution and wildcards.
- Implemented validator utility (CLI & programmatic) validating against schema.json.
- Added new stateless /aggregation/initiate and /poll FastAPI endpoints.
- Retained legacy /run and /status wrappers for backward compatibility.
- Updated spanner-ingestion-workflow.yaml to use the state-passing loop.
- Added comprehensive unit and integration test suites (25 tests total, 100% passing).
---
 .../ingestion-helper/aggregation.yaml         |  26 ++
 .../ingestion-helper/aggregation/__init__.py  |   6 +-
 .../aggregation/orchestrator.py               | 191 ++++++++++++
 .../aggregation/orchestrator_test.py          | 177 +++++++++++
 .../ingestion-helper/aggregation/schema.json  |  93 ++++++
 .../ingestion-helper/aggregation/validator.py | 132 ++++++++
 .../aggregation/validator_test.py             | 289 ++++++++++++++++++
 .../workflow/ingestion-helper/app_test.py     |  72 +++++
 .../workflow/ingestion-helper/pyproject.toml  |   2 +
 .../ingestion-helper/routes/aggregation.py    | 215 ++++++++++---
 .../ingestion-helper/utils/aggregation.py     |  98 ------
 .../utils/aggregation_test.py                 |  87 ------
 pipeline/workflow/ingestion-helper/uv.lock    | 164 ++++++++++
 .../workflow/spanner-ingestion-workflow.yaml  |  48 +--
 14 files changed, 1356 insertions(+), 244 deletions(-)
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation.yaml
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/schema.json
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/validator.py
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/validator_test.py
 delete mode 100644 pipeline/workflow/ingestion-helper/utils/aggregation.py
 delete mode 100644 pipeline/workflow/ingestion-helper/utils/aggregation_test.py

diff --git a/pipeline/workflow/ingestion-helper/aggregation.yaml b/pipeline/workflow/ingestion-helper/aggregation.yaml
new file mode 100644
index 000000000..b6eb01acb
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation.yaml
@@ -0,0 +1,26 @@
+# =============================================================================
+# Data Commons Aggregation Configuration
+# =============================================================================
+# By default, this runs the standard global post-processing steps in parallel.
+# You can customize this file to add place-based rollups, statistical variable
+# aggregations, or to disable specific steps.
+
+aggregations:
+
+  # ---------------------------------------------------------------------------
+  # Stage 1: Standard Global Steps (Run in parallel by default)
+  # ---------------------------------------------------------------------------
+  # Generates linkedContainedInPlace, linkedMemberOf, etc.
+  - type: linked_edges
+    imports: ["*"]
+    stage: 1
+
+  # Generates summary statistics in the Cache table
+  - type: provenance_summary
+    imports: ["*"]
+    stage: 1
+
+  # Generates the Statistical Variable hierarchy/verticals
+  - type: stat_var_groups
+    imports: ["*"]
+    stage: 1
diff --git a/pipeline/workflow/ingestion-helper/aggregation/__init__.py b/pipeline/workflow/ingestion-helper/aggregation/__init__.py
index 0b76748ab..5fedbd914 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/__init__.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/__init__.py
@@ -23,6 +23,8 @@
 from .stat_var_aggregator import StatVarAggregator
 from .place_aggregation_generator import PlaceAggregationGenerator
 from .stat_var_group_generator import StatVarGroupGenerator
+from .orchestrator import AggregationOrchestrator
+from .validator import validate_config
 
 __all__ = [
     'BigQueryExecutor',
@@ -30,5 +32,7 @@
     'ProvenanceSummaryGenerator',
     'StatVarAggregator',
     'PlaceAggregationGenerator',
-    'StatVarGroupGenerator'
+    'StatVarGroupGenerator',
+    'AggregationOrchestrator',
+    'validate_config'
 ]
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
new file mode 100644
index 000000000..db823a8b4
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -0,0 +1,191 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+from typing import Any, Dict, List, Optional
+
+from .bq_executor import BigQueryExecutor
+from .linked_edge_generator import LinkedEdgeGenerator
+from .provenance_summary_generator import ProvenanceSummaryGenerator
+from .stat_var_aggregator import StatVarAggregator
+from .place_aggregation_generator import PlaceAggregationGenerator
+from .stat_var_group_generator import StatVarGroupGenerator
+from .validator import validate_config
+
+logging.getLogger().setLevel(logging.INFO)
+
+
+class AggregationOrchestrator:
+    """Orchestrates the overall aggregation workflow."""
+
+    def __init__(self,
+                 connection_id: str,
+                 project_id: str,
+                 instance_id: str,
+                 database_id: str,
+                 location: Optional[str] = None,
+                 is_base_dc: bool = True,
+                 config_file_path: Optional[str] = None) -> None:
+        """Initializes the orchestrator and loads/validates the configuration.
+
+        Args:
+            connection_id: BigQuery connection ID to Spanner.
+            project_id: GCP Project ID.
+            instance_id: Spanner Instance ID.
+            database_id: Spanner Database ID.
+            location: BigQuery location.
+            is_base_dc: Whether this is running in the base Data Commons environment.
+            config_file_path: Optional custom path to the aggregation.yaml file.
+                If not specified, defaults to the aggregation.yaml in the parent directory.
+        """
+        # Always run asynchronously at the executor level for stages to run in parallel
+        # We handle sequential blocking between stages at the workflow/router level
+        self.executor = BigQueryExecutor(connection_id=connection_id,
+                                         project_id=project_id,
+                                         instance_id=instance_id,
+                                         database_id=database_id,
+                                         location=location,
+                                         run_sequential=False)
+
+        # Initialize all generators
+        self.place_generator = PlaceAggregationGenerator(self.executor, is_base_dc)
+        self.stat_var_aggregator = StatVarAggregator(self.executor, is_base_dc)
+        self.linked_edge_generator = LinkedEdgeGenerator(self.executor, is_base_dc)
+        self.provenance_summary_generator = ProvenanceSummaryGenerator(self.executor, is_base_dc)
+        self.stat_var_group_generator = StatVarGroupGenerator(self.executor, is_base_dc)
+
+        # Resolve paths for default config and schema
+        curr_dir = os.path.dirname(os.path.abspath(__file__))
+        if not config_file_path:
+            config_file_path = os.path.join(curr_dir, "..", "aggregation.yaml")
+        schema_file_path = os.path.join(curr_dir, "schema.json")
+
+        # Load and validate configuration
+        self.aggregations = validate_config(config_file_path, schema_file_path)
+
+
+
+    def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
+        """Executes all enabled aggregations in the specified stage in parallel.
+
+        Args:
+            stage_num: The stage number to execute.
+            active_imports: The list of active import names in this run.
+
+        Returns:
+            A list of BigQuery job IDs submitted for this stage.
+        """
+        logging.info(f"Executing Aggregation Stage {stage_num} for active imports: {active_imports}")
+        jobs = []
+
+        for config in self.aggregations:
+            # 1. Skip if disabled
+            if config.get("disabled", False):
+                continue
+
+            # 2. Filter by stage
+            if config.get("stage", 1) != stage_num:
+                continue
+
+            # 3. Filter by active imports
+            applicable_imports = self._get_applicable_imports(config, active_imports)
+            if not applicable_imports:
+                logging.info(f"Skipping step '{config['type']}' in Stage {stage_num}: no matching active imports.")
+                continue
+
+            # 4. Route to correct generator
+            step_type = config["type"]
+            logging.info(f"Submitting step '{step_type}' in Stage {stage_num} for imports: {applicable_imports}")
+            
+            step_jobs = []
+            if step_type == "place":
+                job = self.place_generator.aggregate_places(
+                    import_names=applicable_imports,
+                    source_type=config["source_type"],
+                    destination_type=config["destination_type"],
+                    allow_multiple_to_places=config.get("allow_multiple_to_places", False)
+                )
+                if job: step_jobs.append(job)
+
+            elif step_type == "stat_var":
+                sv_jobs = self.stat_var_aggregator.aggregate_stat_vars(
+                    ancestor_sv=config["ancestor_sv_id"],
+                    source_svs=config["source_sv_ids"],
+                    import_names=applicable_imports,
+                    output_import_name=config.get("output_import_name"),
+                    skip_all_sources_present_check=config.get("skip_all_sources_present_check", False)
+                )
+                step_jobs.extend(sv_jobs)
+
+            elif step_type == "linked_edges":
+                step_jobs.extend(self.linked_edge_generator.run_all(applicable_imports))
+
+            elif step_type == "provenance_summary":
+                step_jobs.extend(self.provenance_summary_generator.run_all(applicable_imports))
+
+            elif step_type == "stat_var_groups":
+                step_jobs.extend(self.stat_var_group_generator.run_all(applicable_imports))
+
+            # Collect BQ jobs
+            for job in step_jobs:
+                if job and job.job_id:
+                    jobs.append(job.job_id)
+
+        logging.info(f"Submitted {len(jobs)} jobs in Stage {stage_num}: {jobs}")
+        return jobs
+
+    def has_stage(self, stage_num: int, active_imports: List[str]) -> bool:
+        """Checks if there are any active, enabled aggregations configured for the stage.
+
+        Args:
+            stage_num: The stage number to check.
+            active_imports: The list of active import names.
+
+        Returns:
+            True if the stage has at least one aggregation that will run, False otherwise.
+        """
+        for config in self.aggregations:
+            if config.get("disabled", False):
+                continue
+            if config.get("stage", 1) != stage_num:
+                continue
+            
+            # Check if it applies to any active imports
+            if self._get_applicable_imports(config, active_imports):
+                return True
+                
+        return False
+
+    def check_jobs_status(self, job_ids: List[str]) -> Dict[str, Any]:
+        """Checks the status of the specified BigQuery job IDs.
+
+        Delegates to the BigQueryExecutor's get_jobs_status.
+        """
+        try:
+            return self.executor.get_jobs_status(job_ids)
+        except Exception as e:
+            logging.error(f"Failed to check jobs status: {e}")
+            raise e
+
+    def _get_applicable_imports(self, config: Dict[str, Any], active_imports: List[str]) -> List[str]:
+        """Determines which active imports apply to this aggregation config."""
+        configured_imports = config["imports"]
+        
+        # Explicit wildcard check
+        if "*" in configured_imports:
+            return active_imports
+            
+        # Intersection of configured and active imports
+        return list(set(configured_imports).intersection(active_imports))
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
new file mode 100644
index 000000000..4301944c5
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -0,0 +1,177 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import sys
+import unittest
+from unittest.mock import MagicMock, patch, mock_open
+
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+
+from aggregation import AggregationOrchestrator
+
+
+# Sample valid YAML config for testing
+VALID_CONFIG_YAML = """
+aggregations:
+  - type: linked_edges
+    imports: ["*"]
+    stage: 1
+
+  - type: place
+    source_type: County
+    destination_type: State
+    allow_multiple_to_places: false
+    imports: ["USFed_Census"]
+    stage: 1
+
+  - type: place
+    source_type: State
+    destination_type: Country
+    imports: ["*"]
+    stage: 2
+    disabled: true
+
+  - type: stat_var
+    ancestor_sv_id: Count_Person
+    source_sv_ids: ["Count_Person_Male", "Count_Person_Female"]
+    skip_all_sources_present_check: true
+    imports: ["USFed_Census"]
+    stage: 2
+"""
+
+
+
+
+@patch('aggregation.orchestrator.BigQueryExecutor')
+@patch('aggregation.orchestrator.PlaceAggregationGenerator')
+@patch('aggregation.orchestrator.StatVarAggregator')
+@patch('aggregation.orchestrator.LinkedEdgeGenerator')
+@patch('aggregation.orchestrator.ProvenanceSummaryGenerator')
+@patch('aggregation.orchestrator.StatVarGroupGenerator')
+class TestAggregationOrchestrator(unittest.TestCase):
+
+    def setUp(self):
+        # Load the actual schema for validation tests (now inside aggregation/)
+        schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
+        with open(schema_path, "r") as f:
+            self.schema_json = json.load(f)
+
+    def _get_mock_open(self, yaml_content):
+        """Helper to mock open() calls for both the config YAML and the schema JSON."""
+        def side_effect(path, *args, **kwargs):
+            if "schema.json" in path:
+                return mock_open(read_data=json.dumps(self.schema_json))().__enter__()
+            elif "aggregation.yaml" in path:
+                return mock_open(read_data=yaml_content)().__enter__()
+            raise FileNotFoundError(f"Mock open not configured for: {path}")
+        return side_effect
+
+
+
+    @patch('builtins.open')
+    def test_has_stage(self, mock_file_open, *mocks):
+        """Tests the has_stage method for active, disabled, and non-matching stages."""
+        mock_file_open.side_effect = self._get_mock_open(VALID_CONFIG_YAML)
+        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
+
+        # Stage 1: Has linked_edges (wildcard) and place (USFed_Census)
+        self.assertTrue(utils.has_stage(1, ["AnyImport"]))
+        self.assertTrue(utils.has_stage(1, ["USFed_Census"]))
+
+        # Stage 2: Has place (disabled: true) and stat_var (USFed_Census)
+        # If active import is "OtherImport", Stage 2 has no active aggregations (stat_var doesn't match, place is disabled)
+        self.assertFalse(utils.has_stage(2, ["OtherImport"]))
+        # If active import is "USFed_Census", Stage 2 has stat_var active
+        self.assertTrue(utils.has_stage(2, ["USFed_Census"]))
+
+        # Stage 3: Does not exist in config
+        self.assertFalse(utils.has_stage(3, ["USFed_Census"]))
+
+    @patch('builtins.open')
+    def test_execute_stage_1(self, mock_file_open, 
+                             mock_svg_gen, mock_prov_gen, mock_edge_gen, 
+                             mock_sv_agg, mock_place_gen, mock_executor):
+        """Tests executing Stage 1, verifying parallel job submission and wildcard resolution."""
+        mock_file_open.side_effect = self._get_mock_open(VALID_CONFIG_YAML)
+        
+        # Setup generator mocks to return mock jobs
+        mock_job1 = MagicMock()
+        mock_job1.job_id = "job-edge-1"
+        mock_edge_gen.return_value.run_all.return_value = [mock_job1]
+
+        mock_job2 = MagicMock()
+        mock_job2.job_id = "job-place-1"
+        mock_place_gen.return_value.aggregate_places.return_value = mock_job2
+
+        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
+
+        # Execute Stage 1 for active imports: ["USFed_Census"]
+        job_ids = utils.execute_stage(stage_num=1, active_imports=["USFed_Census"])
+
+        # Assertions
+        self.assertEqual(len(job_ids), 2)
+        self.assertIn("job-edge-1", job_ids)
+        self.assertIn("job-place-1", job_ids)
+
+        # Verify linked_edges ran for ALL active imports (wildcard '*')
+        mock_edge_gen.return_value.run_all.assert_called_once_with(["USFed_Census"])
+        
+        # Verify place rollup ran for matching import "USFed_Census"
+        mock_place_gen.return_value.aggregate_places.assert_called_once_with(
+            import_names=["USFed_Census"],
+            source_type="County",
+            destination_type="State",
+            allow_multiple_to_places=False
+        )
+
+    @patch('builtins.open')
+    def test_execute_stage_2_with_disabled_and_filtering(self, mock_file_open, 
+                                                         mock_svg_gen, mock_prov_gen, mock_edge_gen, 
+                                                         mock_sv_agg, mock_place_gen, mock_executor):
+        """Tests Stage 2, verifying that disabled steps are skipped and non-matching imports are filtered."""
+        mock_file_open.side_effect = self._get_mock_open(VALID_CONFIG_YAML)
+
+        # Setup mock for Stage 2 stat_var job
+        mock_job_sv = MagicMock()
+        mock_job_sv.job_id = "job-sv-1"
+        mock_sv_agg.return_value.aggregate_stat_vars.return_value = [mock_job_sv]
+
+        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
+
+        # 1. Run for an import that does NOT match stat_var ("OtherImport")
+        # Since the place rollup in Stage 2 is disabled, no jobs should run at all.
+        job_ids = utils.execute_stage(stage_num=2, active_imports=["OtherImport"])
+        self.assertEqual(len(job_ids), 0)
+        mock_place_gen.return_value.aggregate_places.assert_not_called()
+        mock_sv_agg.return_value.aggregate_stat_vars.assert_not_called()
+
+        # 2. Run for matching import "USFed_Census"
+        # The disabled place rollup should still be skipped, but the stat_var aggregation should execute.
+        job_ids = utils.execute_stage(stage_num=2, active_imports=["USFed_Census"])
+        
+        self.assertEqual(job_ids, ["job-sv-1"])
+        mock_place_gen.return_value.aggregate_places.assert_not_called() # Still skipped because disabled: true
+        mock_sv_agg.return_value.aggregate_stat_vars.assert_called_once_with(
+            ancestor_sv="Count_Person",
+            source_svs=["Count_Person_Male", "Count_Person_Female"],
+            import_names=["USFed_Census"],
+            output_import_name=None,
+            skip_all_sources_present_check=True
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pipeline/workflow/ingestion-helper/aggregation/schema.json b/pipeline/workflow/ingestion-helper/aggregation/schema.json
new file mode 100644
index 000000000..b0deba8f6
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/schema.json
@@ -0,0 +1,93 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "AggregationConfig",
+  "type": "object",
+  "properties": {
+    "aggregations": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "required": ["type", "imports"],
+        "properties": {
+          "type": {
+            "type": "string",
+            "enum": ["place", "stat_var", "entity", "linked_edges", "provenance_summary", "stat_var_groups"]
+          },
+          "disabled": {
+            "type": "boolean",
+            "default": false
+          },
+          "stage": {
+            "type": "integer",
+            "minimum": 1,
+            "default": 1
+          },
+          "imports": {
+            "type": "array",
+            "items": { "type": "string" },
+            "minItems": 1
+          }
+        },
+        "dependencies": {
+          "type": {
+            "oneOf": [
+              {
+                "properties": {
+                  "type": { "const": "place" },
+                  "source_type": { "type": "string" },
+                  "destination_type": { "type": "string" },
+                  "allow_multiple_to_places": { "type": "boolean" }
+                },
+                "required": ["source_type", "destination_type"]
+              },
+              {
+                "properties": {
+                  "type": { "const": "stat_var" },
+                  "ancestor_sv_id": { "type": "string" },
+                  "source_sv_ids": {
+                    "type": "array",
+                    "items": { "type": "string" },
+                    "minItems": 1
+                  },
+                  "skip_all_sources_present_check": { "type": "boolean" },
+                  "output_import_name": { "type": "string" }
+                },
+                "required": ["ancestor_sv_id", "source_sv_ids"]
+              },
+              {
+                "properties": {
+                  "type": { "const": "entity" },
+                  "entity_types": {
+                    "type": "array",
+                    "items": { "type": "string" }
+                  },
+                  "location_props": {
+                    "type": "array",
+                    "items": { "type": "string" }
+                  },
+                  "date_prop": { "type": "string" },
+                  "agg_date_formats": {
+                    "type": "array",
+                    "items": { "type": "string" }
+                  },
+                  "constraints": {
+                    "type": "array",
+                    "items": { "type": "string" }
+                  }
+                },
+                "required": ["entity_types", "location_props"]
+              },
+              {
+                "properties": {
+                  "type": { "enum": ["linked_edges", "provenance_summary", "stat_var_groups"] }
+                }
+              }
+            ]
+          }
+        }
+      }
+    }
+  },
+  "required": ["aggregations"],
+  "additionalProperties": false
+}
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator.py b/pipeline/workflow/ingestion-helper/aggregation/validator.py
new file mode 100644
index 000000000..e1ac6d2e1
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator.py
@@ -0,0 +1,132 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Configuration validator and CLI tool for Data Commons aggregations."""
+
+import argparse
+import json
+import logging
+import os
+import sys
+from typing import Any, Dict, List
+import yaml
+import jsonschema
+
+logging.basicConfig(level=logging.INFO)
+
+# ANSI escape codes for colored terminal output
+GREEN = "\033[92m"
+RED = "\033[91m"
+RESET = "\033[0m"
+
+
+def validate_config(config_file_path: str, schema_file_path: str) -> List[Dict[str, Any]]:
+    """Loads and validates the aggregation YAML configuration against the JSON Schema.
+
+    Args:
+        config_file_path: Path to the aggregation.yaml configuration file.
+        schema_file_path: Path to the aggregation_schema.json validation file.
+
+    Returns:
+        A list of validated aggregation dictionaries.
+
+    Raises:
+        FileNotFoundError: If either the config or schema file is missing.
+        jsonschema.exceptions.ValidationError: If schema validation fails.
+        yaml.YAMLError: If the YAML file is malformed.
+    """
+    if not os.path.exists(config_file_path):
+        raise FileNotFoundError(f"Aggregation config file not found: {config_file_path}")
+    if not os.path.exists(schema_file_path):
+        raise FileNotFoundError(f"JSON Schema file not found: {schema_file_path}")
+
+    # 1. Load and parse YAML
+    try:
+        with open(config_file_path, "r") as f:
+            config = yaml.safe_load(f)
+    except yaml.YAMLError as e:
+        logging.error(f"Failed to parse YAML file {config_file_path}: {e}")
+        raise e
+
+    if not config or "aggregations" not in config:
+        logging.warning("Aggregation config is empty or missing 'aggregations' key.")
+        return []
+
+    # 2. Load JSON Schema
+    try:
+        with open(schema_file_path, "r") as f:
+            schema = json.load(f)
+    except Exception as e:
+        logging.error(f"Failed to load JSON Schema file {schema_file_path}: {e}")
+        raise e
+
+    # 3. Validate against Schema
+    try:
+        jsonschema.validate(instance=config, schema=schema)
+    except jsonschema.exceptions.ValidationError as e:
+        logging.error(f"Schema validation failed for config {config_file_path}: {e.message}")
+        raise e
+
+    return config["aggregations"]
+
+
+def main():
+    """CLI entry point for standalone configuration validation."""
+    parser = argparse.ArgumentParser(description="Validate Data Commons aggregation configuration files against the JSON Schema.")
+    
+    # Resolve default paths relative to this script's directory (aggregation/)
+    curr_dir = os.path.dirname(os.path.abspath(__file__))
+    default_config = os.path.join(curr_dir, "..", "aggregation.yaml")
+    default_schema = os.path.join(curr_dir, "schema.json")
+
+    parser.add_argument(
+        "--config",
+        type=str,
+        default=default_config,
+        help=f"Path to the aggregation YAML config file (default: {default_config})"
+    )
+    parser.add_argument(
+        "--schema",
+        type=str,
+        default=default_schema,
+        help=f"Path to the JSON Schema validation file (default: {default_schema})"
+    )
+
+    args = parser.parse_args()
+
+    print(f"Validating '{os.path.basename(args.config)}' against '{os.path.basename(args.schema)}'...")
+
+    try:
+        aggregations = validate_config(args.config, args.schema)
+        print(f"{GREEN}[SUCCESS] Configuration is valid!{RESET}")
+        print(f"Parsed {len(aggregations)} aggregation steps successfully.")
+        sys.exit(0)
+    except FileNotFoundError as e:
+        print(f"{RED}[ERROR] File not found: {e}{RESET}", file=sys.stderr)
+        sys.exit(1)
+    except jsonschema.exceptions.ValidationError as e:
+        print(f"{RED}[ERROR] Schema Validation Failed:{RESET}", file=sys.stderr)
+        print(f"{RED}  - Path: {'.'.join(str(p) for p in e.path)}{RESET}", file=sys.stderr)
+        print(f"{RED}  - Message: {e.message}{RESET}", file=sys.stderr)
+        sys.exit(1)
+    except yaml.YAMLError as e:
+        print(f"{RED}[ERROR] YAML Syntax Error: {e}{RESET}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"{RED}[ERROR] Unexpected validation failure: {e}{RESET}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
new file mode 100644
index 000000000..fd38a35cc
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
@@ -0,0 +1,289 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Unit tests for the aggregation configuration validator."""
+
+import json
+import os
+import sys
+import unittest
+from unittest.mock import mock_open, patch
+import jsonschema
+import yaml
+
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+
+from aggregation import validate_config
+
+# =============================================================================
+# Mock YAML Configurations for Testing
+# =============================================================================
+
+# 1. A perfectly valid config containing all possible types and fields
+VALID_ALL_TYPES_YAML = """
+aggregations:
+  - type: linked_edges
+    imports: ["*"]
+    stage: 1
+    disabled: false
+
+  - type: place
+    source_type: County
+    destination_type: State
+    allow_multiple_to_places: true
+    imports: ["ImportA", "ImportB"]
+    stage: 2
+
+  - type: stat_var
+    ancestor_sv_id: Count_Person
+    source_sv_ids: ["Count_Person_Male", "Count_Person_Female"]
+    skip_all_sources_present_check: true
+    output_import_name: "Aggregated_Pop"
+    imports: ["ImportC"]
+    stage: 3
+
+  - type: entity
+    entity_types: ["MortalityEvent"]
+    location_props: ["location"]
+    date_prop: "date"
+    agg_date_formats: ["%Y"]
+    imports: ["ImportD"]
+
+  - type: provenance_summary
+    imports: ["*"]
+
+  - type: stat_var_groups
+    imports: ["*"]
+"""
+
+# 2. Invalid: Missing required type field
+INVALID_MISSING_TYPE_YAML = """
+aggregations:
+  - imports: ["*"]
+"""
+
+# 3. Invalid: Missing required imports field
+INVALID_MISSING_IMPORTS_YAML = """
+aggregations:
+  - type: linked_edges
+"""
+
+# 4. Invalid: imports is a string instead of an array
+INVALID_IMPORTS_TYPE_YAML = """
+aggregations:
+  - type: linked_edges
+    imports: "*"
+"""
+
+# 5. Invalid: stage is a string instead of an integer
+INVALID_STAGE_TYPE_YAML = """
+aggregations:
+  - type: linked_edges
+    imports: ["*"]
+    stage: "first"
+"""
+
+# 6. Invalid: stage is 0 (minimum is 1)
+INVALID_STAGE_VALUE_YAML = """
+aggregations:
+  - type: linked_edges
+    imports: ["*"]
+    stage: 0
+"""
+
+# 7. Invalid: empty imports list (minItems: 1)
+INVALID_EMPTY_IMPORTS_YAML = """
+aggregations:
+  - type: linked_edges
+    imports: []
+"""
+
+# 8. Invalid place rollup: missing required source_type
+INVALID_PLACE_MISSING_FIELD_YAML = """
+aggregations:
+  - type: place
+    destination_type: State
+    imports: ["*"]
+"""
+
+# 9. Invalid stat var: missing required source_sv_ids
+INVALID_STAT_VAR_MISSING_FIELD_YAML = """
+aggregations:
+  - type: stat_var
+    ancestor_sv_id: Count_Person
+    imports: ["*"]
+"""
+
+# 10. Invalid stat var: empty source_sv_ids list
+INVALID_STAT_VAR_EMPTY_SVS_YAML = """
+aggregations:
+  - type: stat_var
+    ancestor_sv_id: Count_Person
+    source_sv_ids: []
+    imports: ["*"]
+"""
+
+# 11. Invalid entity: missing required location_props
+INVALID_ENTITY_MISSING_FIELD_YAML = """
+aggregations:
+  - type: entity
+    entity_types: ["Event"]
+    imports: ["*"]
+"""
+
+# 12. Malformed YAML (Indentation error)
+MALFORMED_YAML = """
+aggregations:
+  - type: linked_edges
+  imports:
+  - "*"
+"""
+
+
+class TestConfigValidator(unittest.TestCase):
+
+    def setUp(self):
+        # Load the actual schema from the workspace to ensure tests remain realistic
+        self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
+        with open(self.schema_path, "r") as f:
+            self.schema_json = json.load(f)
+
+    def _get_mock_open(self, yaml_content):
+        """Helper to mock open() for both the schema JSON and the target YAML."""
+        def side_effect(path, *args, **kwargs):
+            if "schema.json" in path:
+                return mock_open(read_data=json.dumps(self.schema_json))().__enter__()
+            else:
+                return mock_open(read_data=yaml_content)().__enter__()
+        return side_effect
+
+    # =============================================================================
+    # Success Test Cases
+    # =============================================================================
+
+    @patch('builtins.open')
+    def test_validate_config_success_all_types(self, mock_file_open):
+        """Verifies that a comprehensive, valid config with all types passes validation."""
+        mock_file_open.side_effect = self._get_mock_open(VALID_ALL_TYPES_YAML)
+
+        aggregations = validate_config("aggregation.yaml", self.schema_path)
+        
+        self.assertEqual(len(aggregations), 6)
+        self.assertEqual(aggregations[0]["type"], "linked_edges")
+        self.assertEqual(aggregations[1]["source_type"], "County")
+        self.assertEqual(aggregations[2]["ancestor_sv_id"], "Count_Person")
+        self.assertEqual(aggregations[3]["entity_types"], ["MortalityEvent"])
+
+    # =============================================================================
+    # Schema Constraint Test Cases
+    # =============================================================================
+
+    @patch('builtins.open')
+    def test_validate_config_missing_type(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_MISSING_TYPE_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("'type' is a required property", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_missing_imports(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_MISSING_IMPORTS_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("'imports' is a required property", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_invalid_imports_type(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_IMPORTS_TYPE_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("is not of type 'array'", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_invalid_stage_type(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_STAGE_TYPE_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("is not of type 'integer'", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_invalid_stage_value(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_STAGE_VALUE_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("is less than the minimum of 1", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_empty_imports_list(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_EMPTY_IMPORTS_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("should be non-empty", ctx.exception.message)
+
+    # =============================================================================
+    # Conditional Dependency Test Cases (OneOf/Dependencies)
+    # =============================================================================
+
+    @patch('builtins.open')
+    def test_validate_config_place_missing_field(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_PLACE_MISSING_FIELD_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        # Validation fails because place rollup requires source_type
+        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_stat_var_missing_field(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_STAT_VAR_MISSING_FIELD_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_stat_var_empty_source_svs(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_STAT_VAR_EMPTY_SVS_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("should be non-empty", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_entity_missing_field(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(INVALID_ENTITY_MISSING_FIELD_YAML)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+
+    # =============================================================================
+    # File System & Syntax Error Test Cases
+    # =============================================================================
+
+    @patch('builtins.open')
+    def test_validate_config_yaml_syntax_error(self, mock_file_open):
+        mock_file_open.side_effect = self._get_mock_open(MALFORMED_YAML)
+        with self.assertRaises(yaml.YAMLError):
+            validate_config("aggregation.yaml", self.schema_path)
+
+    def test_validate_config_missing_config_file(self):
+        with self.assertRaises(FileNotFoundError):
+            validate_config("non_existent_config.yaml", "schema.json")
+
+    def test_validate_config_missing_schema_file(self):
+        # We patch os.path.exists to simulate config existing but schema missing
+        with patch('os.path.exists', side_effect=lambda path: "aggregation.yaml" in path):
+            with self.assertRaises(FileNotFoundError):
+                validate_config("aggregation.yaml", "non_existent_schema.json")
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/pipeline/workflow/ingestion-helper/app_test.py b/pipeline/workflow/ingestion-helper/app_test.py
index 8150e5a6b..d19fdfb7e 100644
--- a/pipeline/workflow/ingestion-helper/app_test.py
+++ b/pipeline/workflow/ingestion-helper/app_test.py
@@ -263,6 +263,78 @@ def test_update_import_version_override_success(self, mock_get_caller_identity):
         # Verify get_caller_identity was called exactly once outside of the loop
         mock_get_caller_identity.assert_called_once()
 
+    @patch('routes.aggregation._get_orchestrator')
+    def test_aggregation_initiate_success(self, mock_aggregation_utils):
+        # Setup mock orchestrator
+        mock_instance = MagicMock()
+        mock_aggregation_utils.return_value = mock_instance
+        mock_instance.has_stage.side_effect = lambda stage, imports: stage == 1
+        mock_instance.execute_stage.return_value = ["job-1", "job-2"]
+
+        # Call endpoint
+        payload = {
+            "importList": [{"importName": "USFed_Census"}]
+        }
+        response = client.post("/aggregation/initiate", json=payload)
+
+        # Assertions
+        self.assertEqual(response.status_code, 200)
+        state = response.json()
+        self.assertEqual(state["status"], "RUNNING")
+        self.assertEqual(state["current_stage"], 1)
+        self.assertEqual(state["active_job_ids"], ["job-1", "job-2"])
+        self.assertEqual(state["import_list"], [{"importName": "USFed_Census"}])
+
+    @patch('routes.aggregation._get_orchestrator')
+    def test_aggregation_poll_transition(self, mock_aggregation_utils):
+        # Setup mock orchestrator to simulate Stage 1 completion and Stage 2 execution
+        mock_instance = MagicMock()
+        mock_aggregation_utils.return_value = mock_instance
+        
+        # Mock BQ reporting Stage 1 jobs are DONE
+        mock_instance.check_jobs_status.return_value = {"status": "DONE"}
+        # Mock Stage 2 existence and execution
+        mock_instance.has_stage.side_effect = lambda stage, imports: stage == 2
+        mock_instance.execute_stage.return_value = ["job-stage2-1"]
+
+        # Input state (Stage 1 completed)
+        payload = {
+            "status": "RUNNING",
+            "current_stage": 1,
+            "active_job_ids": ["job-1", "job-2"],
+            "import_list": [{"importName": "USFed_Census"}]
+        }
+        
+        # Call endpoint
+        response = client.post("/aggregation/poll", json=payload)
+
+        # Assertions
+        self.assertEqual(response.status_code, 200)
+        state = response.json()
+        self.assertEqual(state["status"], "RUNNING")
+        self.assertEqual(state["current_stage"], 2) # Transitioned to 2!
+        self.assertEqual(state["active_job_ids"], ["job-stage2-1"])
+
+    @patch('routes.aggregation._get_orchestrator')
+    def test_aggregation_legacy_run(self, mock_aggregation_utils):
+        # Setup mock orchestrator
+        mock_instance = MagicMock()
+        mock_aggregation_utils.return_value = mock_instance
+        mock_instance.has_stage.side_effect = lambda stage, imports: stage in [1, 2]
+        mock_instance.execute_stage.side_effect = lambda stage, imports: [f"job-stage{stage}-1"]
+
+        # Call legacy endpoint
+        payload = {
+            "importList": [{"importName": "USFed_Census"}]
+        }
+        response = client.post("/aggregation/run", json=payload)
+
+        # Assertions (should return all jobs from all stages in parallel)
+        self.assertEqual(response.status_code, 200)
+        data = response.json()
+        self.assertEqual(data["status"], "SUBMITTED")
+        self.assertEqual(data["jobIds"], ["job-stage1-1", "job-stage2-1"])
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/pipeline/workflow/ingestion-helper/pyproject.toml b/pipeline/workflow/ingestion-helper/pyproject.toml
index 6301d7b2e..4ce4a251a 100644
--- a/pipeline/workflow/ingestion-helper/pyproject.toml
+++ b/pipeline/workflow/ingestion-helper/pyproject.toml
@@ -33,6 +33,8 @@ dependencies = [
     "google-cloud-bigquery",
     "redis",
     "jinja2",
+    "pyyaml>=6.0.3",
+    "jsonschema>=4.26.0",
 ]
 
 [tool.hatch.version]
diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index 7433f906d..ce7434763 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -13,44 +13,61 @@
 # limitations under the License.
 
 import logging
-from fastapi import APIRouter, HTTPException
-from utils.aggregation import AggregationUtils
-import config
 from typing import Any, Dict, List, Optional
+from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field
+
+import config
 from routes.models import BaseResponse, ResponseStatus
+from aggregation import AggregationOrchestrator
 from utils.logging import log_start
 
-class AggregationRequest(BaseModel):
+# =============================================================================
+# Pydantic Models for the New Stateless API
+# =============================================================================
+
+class StateObject(BaseModel):
+    status: str = Field(..., description="Overall status of the run: RUNNING, SUCCEEDED, FAILED")
+    current_stage: int = Field(..., description="The stage currently executing")
+    active_job_ids: List[str] = Field(default_factory=list, description="BQ job IDs running in the current stage")
+    import_list: List[Dict[str, Any]] = Field(default_factory=list, description="Original list of imports")
+    error: Optional[str] = Field(default=None, description="Detailed error message if failed")
+
+class InitiateRequest(BaseModel):
     importList: List[Dict[str, Any]] = Field(default_factory=list)
 
-class AggregationStatusRequest(BaseModel):
+# =============================================================================
+# Pydantic Models for the Legacy API (Backward Compatibility)
+# =============================================================================
+
+class LegacyAggregationRequest(BaseModel):
+    importList: List[Dict[str, Any]] = Field(default_factory=list)
+
+class LegacyAggregationStatusRequest(BaseModel):
     jobIds: List[str] = Field(default_factory=list)
 
-class AggregationResponse(BaseResponse):
+class LegacyAggregationResponse(BaseResponse):
     jobIds: List[str] = Field(default_factory=list, description="BigQuery job IDs submitted for async aggregation")
 
-class AggregationStatusResponse(BaseResponse):
+class LegacyAggregationStatusResponse(BaseResponse):
     error: Optional[str] = Field(default=None, description="Detailed error message if failed")
     failedJobs: Optional[List[str]] = Field(default_factory=list, description="List of failed BigQuery job IDs")
 
+# =============================================================================
+# Router Definition
+# =============================================================================
+
 router = APIRouter(prefix="/aggregation", tags=["aggregation"])
 
-@router.post("/run", response_model=AggregationResponse)
-@log_start
-def run_aggregation(req: AggregationRequest):
-    """Runs aggregation logic asynchronously for the specified imports, returning BigQuery job IDs."""
-    if not req.importList:
-        logging.info("Empty import list. Skipping aggregation.")
-        return AggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=[])
-        
+
+def _get_orchestrator() -> AggregationOrchestrator:
+    """Helper to initialize the orchestrator using global config."""
     if not all([config.SPANNER_CONNECTION_ID, config.SPANNER_PROJECT_ID, config.SPANNER_INSTANCE_ID, config.SPANNER_GRAPH_DATABASE_ID]):
         raise HTTPException(
             status_code=400,
             detail="Missing required configuration environment variables: SPANNER_CONNECTION_ID, SPANNER_PROJECT_ID, SPANNER_INSTANCE_ID, or SPANNER_GRAPH_DATABASE_ID"
         )
-        
-    aggregation = AggregationUtils(
+    return AggregationOrchestrator(
         connection_id=config.SPANNER_CONNECTION_ID,
         project_id=config.SPANNER_PROJECT_ID,
         instance_id=config.SPANNER_INSTANCE_ID,
@@ -58,39 +75,155 @@ def run_aggregation(req: AggregationRequest):
         location=config.LOCATION,
         is_base_dc=config.IS_BASE_DC,
     )
+
+# -----------------------------------------------------------------------------
+# New Stateless API Endpoints (Stage-based)
+# -----------------------------------------------------------------------------
+
+@router.post("/initiate", response_model=StateObject)
+@log_start
+def initiate_aggregation(req: InitiateRequest):
+    """Initiates the aggregation run by executing Stage 1 and returning the initial state."""
+    if not req.importList:
+        logging.info("Empty import list. Skipping aggregation.")
+        return StateObject(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=[])
+
     try:
-        job_ids = aggregation.run_aggregation(req.importList)
-        return AggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=job_ids)
+        orchestrator = _get_orchestrator()
+        import_names = [item.get('importName') for item in req.importList if item.get('importName')]
+        
+        # Find the first stage that has active aggregations (usually Stage 1)
+        first_stage = 1
+        while first_stage <= 10: # Arbitrary upper limit for safety
+            if orchestrator.has_stage(first_stage, import_names):
+                break
+            first_stage += 1
+        else:
+            logging.info("No stages have active aggregations for the current imports. Completing immediately.")
+            return StateObject(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=req.importList)
+
+        logging.info(f"Initiating aggregation at Stage {first_stage}")
+        job_ids = orchestrator.execute_stage(first_stage, import_names)
+        
+        return StateObject(
+            status="RUNNING",
+            current_stage=first_stage,
+            active_job_ids=job_ids,
+            import_list=req.importList
+        )
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Aggregation failed: {str(e)}")
+        logging.error(f"Failed to initiate aggregation: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to initiate aggregation: {str(e)}")
 
-@router.post("/status", response_model=AggregationStatusResponse)
-def check_aggregation_status(req: AggregationStatusRequest):
-    """Checks the status of the submitted aggregation BigQuery jobs."""
-    if not req.jobIds:
-        logging.info("Empty jobIds. Returning status DONE.")
-        return AggregationStatusResponse(status=ResponseStatus.DONE)
+
+@router.post("/poll", response_model=StateObject)
+@log_start
+def poll_aggregation(state: StateObject):
+    """Checks progress of active jobs and transitions to the next stage if complete."""
+    if state.status != "RUNNING":
+        return state # Already in a terminal state
+
+    try:
+        orchestrator = _get_orchestrator()
+        import_names = [item.get('importName') for item in state.import_list if item.get('importName')]
+
+        # 1. Check status of active jobs in BigQuery
+        logging.info(f"Polling status for jobs in Stage {state.current_stage}: {state.active_job_ids}")
+        bq_status = orchestrator.check_jobs_status(state.active_job_ids)
         
-    if not all([config.SPANNER_CONNECTION_ID, config.SPANNER_PROJECT_ID, config.SPANNER_INSTANCE_ID, config.SPANNER_GRAPH_DATABASE_ID]):
-        raise HTTPException(
-            status_code=400,
-            detail="Missing required configuration environment variables."
+        # Case A: Any job failed
+        if bq_status["status"] == "FAILED":
+            logging.error(f"Stage {state.current_stage} failed with error: {bq_status.get('error')}")
+            return StateObject(
+                status="FAILED",
+                current_stage=state.current_stage,
+                active_job_ids=[],
+                import_list=state.import_list,
+                error=bq_status.get("error")
+            )
+            
+        # Case B: Jobs are still running
+        if bq_status["status"] == "RUNNING":
+            logging.info(f"Stage {state.current_stage} is still executing.")
+            return state # Return unchanged
+            
+        # Case C: All jobs succeeded -> Find and execute the next active stage
+        next_stage = state.current_stage + 1
+        while next_stage <= 10: # Arbitrary upper limit
+            if orchestrator.has_stage(next_stage, import_names):
+                logging.info(f"Stage {state.current_stage} completed. Transitioning to Stage {next_stage}...")
+                new_job_ids = orchestrator.execute_stage(next_stage, import_names)
+                return StateObject(
+                    status="RUNNING",
+                    current_stage=next_stage,
+                    active_job_ids=new_job_ids,
+                    import_list=state.import_list
+                )
+            next_stage += 1
+            
+        # If we exit the loop, there are no more active stages left
+        logging.info("All aggregation stages completed successfully!")
+        return StateObject(
+            status="SUCCEEDED",
+            current_stage=state.current_stage,
+            active_job_ids=[],
+            import_list=state.import_list
         )
+            
+    except Exception as e:
+        logging.error(f"Error during polling: {e}")
+        return StateObject(
+            status="FAILED",
+            current_stage=state.current_stage,
+            active_job_ids=[],
+            import_list=state.import_list,
+            error=f"Orchestrator error: {str(e)}"
+        )
+
+# -----------------------------------------------------------------------------
+# Legacy API Endpoints (Backward Compatibility Mode)
+# -----------------------------------------------------------------------------
+
+@router.post("/run", response_model=LegacyAggregationResponse)
+@log_start
+def run_aggregation_legacy(req: LegacyAggregationRequest):
+    """Legacy endpoint. Runs ALL enabled aggregations in parallel (ignores stages)."""
+    if not req.importList:
+        logging.info("Empty import list. Skipping legacy aggregation.")
+        return LegacyAggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=[])
+        
+    try:
+        orchestrator = _get_orchestrator()
+        import_names = [item.get('importName') for item in req.importList if item.get('importName')]
+        
+        # Compatibility Mode: Submit ALL enabled stages in parallel
+        job_ids = []
+        for stage_num in range(1, 10):
+            if orchestrator.has_stage(stage_num, import_names):
+                job_ids.extend(orchestrator.execute_stage(stage_num, import_names))
+                
+        return LegacyAggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=job_ids)
+    except Exception as e:
+        logging.error(f"Legacy aggregation failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Legacy aggregation failed: {str(e)}")
+
+
+@router.post("/status", response_model=LegacyAggregationStatusResponse)
+@log_start
+def check_aggregation_status_legacy(req: LegacyAggregationStatusRequest):
+    """Legacy endpoint. Checks the status of the submitted BigQuery jobs."""
+    if not req.jobIds:
+        logging.info("Empty jobIds. Returning status DONE.")
+        return LegacyAggregationStatusResponse(status=ResponseStatus.DONE)
         
-    aggregation = AggregationUtils(
-        connection_id=config.SPANNER_CONNECTION_ID,
-        project_id=config.SPANNER_PROJECT_ID,
-        instance_id=config.SPANNER_INSTANCE_ID,
-        database_id=config.SPANNER_GRAPH_DATABASE_ID,
-        location=config.LOCATION,
-        is_base_dc=config.IS_BASE_DC,
-    )
     try:
-        status_info = aggregation.check_aggregation_status(req.jobIds)
-        return AggregationStatusResponse(
+        orchestrator = _get_orchestrator()
+        status_info = orchestrator.check_jobs_status(req.jobIds)
+        return LegacyAggregationStatusResponse(
             status=ResponseStatus.from_str(status_info.get("status", "ERROR")),
             error=status_info.get("error"),
             failedJobs=status_info.get("failedJobs", [])
         )
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Aggregation status check failed: {str(e)}")
+        logging.error(f"Legacy status check failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Legacy status check failed: {str(e)}")
diff --git a/pipeline/workflow/ingestion-helper/utils/aggregation.py b/pipeline/workflow/ingestion-helper/utils/aggregation.py
deleted file mode 100644
index 432a4000f..000000000
--- a/pipeline/workflow/ingestion-helper/utils/aggregation.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-from typing import Any, Dict, List, Optional
-
-from aggregation import BigQueryExecutor
-from aggregation import LinkedEdgeGenerator
-from aggregation import ProvenanceSummaryGenerator
-from aggregation import StatVarAggregator
-from aggregation import StatVarGroupGenerator
-from google.cloud import bigquery
-
-logging.getLogger().setLevel(logging.INFO)
-
-
-class AggregationUtils:
-    """Orchestrates the overall aggregation workflow."""
-
-    def __init__(self,
-                 connection_id: str,
-                 project_id: str,
-                 instance_id: str,
-                 database_id: str,
-                 location: Optional[str] = None,
-                 is_base_dc: bool = True) -> None:
-        # TODO: remove sequential execution once DCP changes are made
-        # Use sequential execution for DCP (backward compatibility)
-        run_sequential = not is_base_dc
-        self.executor = BigQueryExecutor(connection_id=connection_id,
-                                         project_id=project_id,
-                                         instance_id=instance_id,
-                                         database_id=database_id,
-                                         location=location,
-                                         run_sequential=run_sequential)
-        self.linked_edge_generator = LinkedEdgeGenerator(
-            self.executor, is_base_dc)
-        self.provenance_summary_generator = ProvenanceSummaryGenerator(
-            self.executor, is_base_dc)
-
-    def run_aggregation(self, import_list: List[Dict[str, Any]]) -> List[str]:
-        """
-        Orchestrates standard per-import aggregations and global aggregations.
-        Returns a list of BigQuery job IDs for async polling.
-        """
-        logging.info(f"Received request for importList: {import_list}")
-
-        try:
-            import_names = []
-            # 1. Run standard per-import aggregations
-            for import_item in import_list:
-                import_name = import_item.get('importName')
-                if import_name:
-                    import_names.append(import_name)
-                    query = "SELECT @import_name as import_name, CURRENT_TIMESTAMP() as execution_time"
-                    job_config = bigquery.QueryJobConfig(query_parameters=[
-                        bigquery.ScalarQueryParameter("import_name", "STRING",
-                                                      import_name),
-                    ])
-                    self.executor.execute(query, job_config=job_config)
-                else:
-                    logging.info(
-                        'Skipping aggregation logic for empty importName')
-
-            # 2. Run global aggregations asynchronously
-            jobs = []
-            jobs.extend(self.linked_edge_generator.run_all(import_names))
-            jobs.extend(self.provenance_summary_generator.run_all(import_names))
-
-            job_ids = [job.job_id for job in jobs if job]
-            logging.info(f"Submitted async aggregation jobs: {job_ids}")
-
-            return job_ids
-        except Exception as e:
-            logging.error(f"Aggregation failed: {e}")
-            raise e
-
-    def check_aggregation_status(self, job_ids: List[str]) -> Dict[str, Any]:
-        """
-        Checks the status of the provided BigQuery job IDs.
-        """
-        logging.info(f"Checking status for jobs: {job_ids}")
-        try:
-            return self.executor.get_jobs_status(job_ids)
-        except Exception as e:
-            logging.error(f"Failed to check aggregation status: {e}")
-            raise e
diff --git a/pipeline/workflow/ingestion-helper/utils/aggregation_test.py b/pipeline/workflow/ingestion-helper/utils/aggregation_test.py
deleted file mode 100644
index f4afb6e3f..000000000
--- a/pipeline/workflow/ingestion-helper/utils/aggregation_test.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import unittest
-from unittest.mock import MagicMock
-from unittest.mock import patch
-
-sys.path.append(os.path.dirname(os.path.dirname(__file__)))
-
-from utils.aggregation import AggregationUtils
-
-
-@patch('utils.aggregation.BigQueryExecutor')
-@patch('utils.aggregation.LinkedEdgeGenerator')
-@patch('utils.aggregation.ProvenanceSummaryGenerator')
-@patch('utils.aggregation.StatVarGroupGenerator')
-class TestAggregationUtils(unittest.TestCase):
-
-    def test_run_aggregation(self, mock_prov_gen, mock_edge_gen, mock_executor):
-        # Setup mocks
-        mock_executor_instance = MagicMock()
-        mock_executor.return_value = mock_executor_instance
-
-        mock_edge_gen_instance = MagicMock()
-        mock_edge_gen.return_value = mock_edge_gen_instance
-        mock_job1 = MagicMock()
-        mock_job1.job_id = "job1"
-        mock_edge_gen_instance.run_all.return_value = [mock_job1]
-
-        mock_prov_gen_instance = MagicMock()
-        mock_prov_gen.return_value = mock_prov_gen_instance
-        mock_job2 = MagicMock()
-        mock_job2.job_id = "job2"
-        mock_prov_gen_instance.run_all.return_value = [mock_job2]
-
-        utils = AggregationUtils(connection_id="conn",
-                                 project_id="proj",
-                                 instance_id="inst",
-                                 database_id="db",
-                                 is_base_dc=True)
-
-        import_list = [{'importName': 'import1'}, {'importName': 'import2'}]
-        job_ids = utils.run_aggregation(import_list)
-
-        # Verify standard import queries were executed
-        self.assertEqual(mock_executor_instance.execute.call_count, 2)
-
-        # Verify generators were called
-        mock_edge_gen_instance.run_all.assert_called_once_with(
-            ["import1", "import2"])
-        mock_prov_gen_instance.run_all.assert_called_once_with(
-            ["import1", "import2"])
-
-        self.assertEqual(job_ids, ["job1", "job2"])
-
-    def test_check_aggregation_status(self, mock_prov_gen, mock_edge_gen,
-                                      mock_executor):
-        mock_executor_instance = MagicMock()
-        mock_executor.return_value = mock_executor_instance
-        mock_executor_instance.get_jobs_status.return_value = {"status": "DONE"}
-
-        utils = AggregationUtils(connection_id="conn",
-                                 project_id="proj",
-                                 instance_id="inst",
-                                 database_id="db")
-
-        status = utils.check_aggregation_status(["job1", "job2"])
-        mock_executor_instance.get_jobs_status.assert_called_once_with(
-            ["job1", "job2"])
-        self.assertEqual(status, {"status": "DONE"})
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/pipeline/workflow/ingestion-helper/uv.lock b/pipeline/workflow/ingestion-helper/uv.lock
index 6fa14bcb8..79e17f455 100644
--- a/pipeline/workflow/ingestion-helper/uv.lock
+++ b/pipeline/workflow/ingestion-helper/uv.lock
@@ -47,6 +47,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" },
 ]
 
+[[package]]
+name = "attrs"
+version = "26.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/8e/82a0fe20a541c03148528be8cac2408564a6c9a0cc7e9171802bc1d26985/attrs-26.1.0.tar.gz", hash = "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32", size = 952055, upload-time = "2026-03-19T14:22:25.026Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2026.5.20"
@@ -272,7 +281,9 @@ dependencies = [
     { name = "google-cloud-spanner" },
     { name = "google-cloud-storage" },
     { name = "jinja2" },
+    { name = "jsonschema" },
     { name = "pydantic" },
+    { name = "pyyaml" },
     { name = "redis" },
     { name = "uvicorn", extra = ["standard"] },
 ]
@@ -293,7 +304,9 @@ requires-dist = [
     { name = "google-cloud-spanner" },
     { name = "google-cloud-storage" },
     { name = "jinja2" },
+    { name = "jsonschema", specifier = ">=4.26.0" },
     { name = "pydantic", specifier = ">=2.0.0" },
+    { name = "pyyaml", specifier = ">=6.0.3" },
     { name = "redis" },
     { name = "uvicorn", extras = ["standard"], specifier = ">=0.28.0" },
 ]
@@ -722,6 +735,33 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]
 
+[[package]]
+name = "jsonschema"
+version = "4.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "jsonschema-specifications" },
+    { name = "referencing" },
+    { name = "rpds-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
+]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2025.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "referencing" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
+]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
@@ -1196,6 +1236,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/27/e3/b519734372d305bd547534a9f32e4ce9f98552af753dce72cf3483a0ff0b/redis-8.0.0-py3-none-any.whl", hash = "sha256:c938c18338585009f0bc310f4c7e4e4b4d37639356c4ac072cedf3af570c8dc7", size = 499870, upload-time = "2026-05-28T12:45:11.697Z" },
 ]
 
+[[package]]
+name = "referencing"
+version = "0.37.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "rpds-py" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
+]
+
 [[package]]
 name = "requests"
 version = "2.34.2"
@@ -1211,6 +1265,116 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a0/f4/c67b0b3f1b9245e8d266f0f112c500d50e5b4e83cb6f3b71b6528104182a/requests-2.34.2-py3-none-any.whl", hash = "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0", size = 73075, upload-time = "2026-05-14T19:25:26.443Z" },
 ]
 
+[[package]]
+name = "rpds-py"
+version = "2026.5.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/43/25a8dcd3feedd735039a8f0b5b7e3b118232b5eae288c4fd9ab200d41094/rpds_py-2026.5.1.tar.gz", hash = "sha256:07b24fea40541e28570e5b795a4a38fbdcd12550c06bd0748005ecc8116ca256", size = 64459, upload-time = "2026-05-28T12:02:13.232Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d4/e7/a78582dc57caa592dcc7d4fb69b61390561e908eb3d2f5df5928a8e354c0/rpds_py-2026.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3abe24a66e57adcfa645d718063a5fa5103ecc71ddbf26d78af8f9368018ff1d", size = 353040, upload-time = "2026-05-28T11:59:12.531Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/43/35e3f136343aef451e545ce8c38d36c2f93c0ed88703db8b64ba2b205c68/rpds_py-2026.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58b1d94308ddf0b1982f61f2eb54bf92997c9ece8a8093ef014250f4a517906c", size = 345775, upload-time = "2026-05-28T11:59:13.827Z" },
+    { url = "https://files.pythonhosted.org/packages/20/e1/0f2160c5982d3157734d5cb3ed63d8b2d583a73c9864f77b666449f32cf8/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa92420128dadce7f54bd73ba1825a273e9268fe9e35dbf7e6362890efa4e08", size = 376329, upload-time = "2026-05-28T11:59:15.271Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/11/ee0ba42aff83bf4effdbc576673c6be64c5e173978c3f6d537e94482f77d/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ca653c6546386227cd9800d1bef6a348099acf8db4250341da6d90f663d6dfcb", size = 383539, upload-time = "2026-05-28T11:59:16.665Z" },
+    { url = "https://files.pythonhosted.org/packages/11/df/d94aa6a499d4ac40afe2d7620f2c597fd3c0f182e854ad7cf3f596a81cb6/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66c93681c4729e4e3ecba31b8179fae083ff3118841672835140338b4b9867c1", size = 494674, upload-time = "2026-05-28T11:59:17.991Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/75/33d30f43bb2f458de11979486a591b1bf6e5651765ed1704c6197c2dc773/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40ff257542e04796880e011e15cd4dc21c2599975df2aaa8f2c8495ca574e1a5", size = 389268, upload-time = "2026-05-28T11:59:19.434Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/1e/2c9096fc19d5fd084b0184ca2b651e659aa0a37e6fdbecf6ece47f147fe1/rpds_py-2026.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6825cc329b290e93c5f6a9be2393118a763f6ccf6abd83704e0c102ca583644", size = 376280, upload-time = "2026-05-28T11:59:21Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/e5/61ec9f8be8211ea7f48448195549e4aaf02004083475493b0e137702ecb2/rpds_py-2026.5.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:de42116e69cb53b911cc34aee5ab98f36c597b822545045d49e938818b99e5e4", size = 387233, upload-time = "2026-05-28T11:59:22.454Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/ca/bcec1005c4f4a234f92a29078631fee49206c7265ccae966f18fd332e80e/rpds_py-2026.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0f920015df2a504bebaba6d4c31ccf3fcf942f92655c086da30b671aad19aa6", size = 405009, upload-time = "2026-05-28T11:59:23.845Z" },
+    { url = "https://files.pythonhosted.org/packages/72/e6/4d5718c5cf26c522dc7c9999e238da1e77380b81d0c5d1df11e271ddfeb1/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0408a24e44feb919423dc6d9da677cb5cddb894d2ca9e763967d156d9c60fab4", size = 553113, upload-time = "2026-05-28T11:59:25.184Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/25/2ee807bdb3e1f0b7eddf7782acd5665a8b5205a331a7d7244a52c4812fd9/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cea68bcd53467561ae2f96a6bdad1544299ba97b5b0ddcd5ac3d376e5c781c24", size = 618838, upload-time = "2026-05-28T11:59:26.749Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/c1/7d4c26f167f8c41501cc073d30ee22082b16ce358cf5b00ec97cbc7804ea/rpds_py-2026.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4be8b1d2a705cc37d08256004e1d07de143fa0075c8e85a3df020b776f62b732", size = 582436, upload-time = "2026-05-28T11:59:28.11Z" },
+    { url = "https://files.pythonhosted.org/packages/04/1d/9d12b0a337bab46f4769f8857f4007e3b2d639e14f9a44a0efe157696e64/rpds_py-2026.5.1-cp312-cp312-win32.whl", hash = "sha256:6736718bd4fc49cbcb538ba30516fdbef161522acefb739657d48b97bd864fed", size = 212734, upload-time = "2026-05-28T11:59:29.689Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/93/e4116f2de7f56bc7406a76033dc501811ddeb22b7f056b92d632871ebb0c/rpds_py-2026.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:0a7d1eec967df0e9b22614a5e177622e0c89611d03727fa0cb48e45028907870", size = 229045, upload-time = "2026-05-28T11:59:31.033Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/53/6c3419d85eb2ec5938a37627c585b42d76a63bb731d6e42ed4b079ebf486/rpds_py-2026.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:1841d067089e117142d79b98aa0df2f08b52f2ecc1819dd2700636c0db74a473", size = 223967, upload-time = "2026-05-28T11:59:32.318Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/32/14c961ad295f490eb0849ada8b79683e93a59b9de3afdd983eaf55fa6867/rpds_py-2026.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:efef4ac29c6ff495531eb17ee705b62841ecaa291b7c7077e848ea03e237164d", size = 352787, upload-time = "2026-05-28T11:59:33.655Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/bb/d1b85117967c11191441a7274ae616c65d93901d082c588f89a50a8da5ae/rpds_py-2026.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c39f5b67a8a2e67179ada2a954227d670fe65fa9098457f698f56ddf248709b3", size = 345179, upload-time = "2026-05-28T11:59:35Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/46/d84105f062e626a1b233f863907288a4708c2d833b8b4c6fb2764bc080c0/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5c30f3f04eef4fbd362226a6f31d7c8895ca4fbb6e0b790f6890a98d8da8559", size = 376173, upload-time = "2026-05-28T11:59:36.43Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/ae/469d7959ce5b1201e1de135dc735b86db3b35dd0d1734f6a44246d5f061c/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:277f6c82f0580848796c7ecc8a7173aa3bfb928e4ff831261c2f60a81dc270db", size = 383162, upload-time = "2026-05-28T11:59:37.995Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/a2/57853d31a1116a561aa072794602ad3f6341e18d70a8523f1bd5b9fc1e5a/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63c2c4c213f1a4e3f3de28ecab029dbdee976324e729c0d7a55211be72576b02", size = 495093, upload-time = "2026-05-28T11:59:39.453Z" },
+    { url = "https://files.pythonhosted.org/packages/99/63/3a8eabcad9314b7daf5c65f451d2c33d989235cd8a5762186cf2c3f5a4f8/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3350ec808fb538fe71a1f94dfaa0e29c598dfad805ce49f0caec5ae3183c652b", size = 389829, upload-time = "2026-05-28T11:59:40.896Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/25/05678d97fc25e2622df14dc530fb82023174ecfff6733991ed0d78f167bd/rpds_py-2026.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1b964e3ab599e718dc46c018d104b1ebc007cbc6567d827c94a687fca56d77e", size = 374786, upload-time = "2026-05-28T11:59:42.626Z" },
+    { url = "https://files.pythonhosted.org/packages/88/d1/8c90b6431e80a3b91b284a5c7c8c0c4f9c006444d90477a740d6e0f9c694/rpds_py-2026.5.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:19cb09fab7b7fc96b2a6e28f2e34b72a3705ff27b37edb77455316e5d3f3dc9b", size = 386920, upload-time = "2026-05-28T11:59:44.124Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/99/4638f672ab356682d633ee0da9255f5b67ce6efd0b85eb94ad3e255e65a5/rpds_py-2026.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:abe76bcdba31e576cb83eeb8797aa0d882b738fef6dc65d0601fc753806a5b46", size = 405059, upload-time = "2026-05-28T11:59:47.177Z" },
+    { url = "https://files.pythonhosted.org/packages/66/3f/3546524b6eb4cc2e1f363a3d638fa52f6c24faae3500c25fb488b02f1740/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8bff7073db3899158fff55ebf57b113a67030af26f80a18978f9f0aa60250ddf", size = 553030, upload-time = "2026-05-28T11:59:48.603Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/c3/7b3388c796fcf471bd17194242d4dc1a7608567c0fa422bcc1c5e79f9c1e/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8ba264fa49be666cd9cc56bf34ec7002fb3d27a4aee5bcb4d43d0d18feb1bb6f", size = 618975, upload-time = "2026-05-28T11:59:50.314Z" },
+    { url = "https://files.pythonhosted.org/packages/61/1e/a3cb07f2795075d1d88efddae2f541359fde5f08c81ee114c29c2949c90a/rpds_py-2026.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4860b603ddda0475a8885499b3729e90229d480105b42651962a5397d995fa89", size = 581178, upload-time = "2026-05-28T11:59:51.673Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/74/e758c03a5ef46f04c37f2651a2893db846d569ba8a7bca469d4b58939bcd/rpds_py-2026.5.1-cp313-cp313-win32.whl", hash = "sha256:7944270ae71383f6e2657dd7d5ce4eeb4ac2d0059a6738f0510583d462ab4842", size = 212481, upload-time = "2026-05-28T11:59:53.148Z" },
+    { url = "https://files.pythonhosted.org/packages/70/ec/a2aca432db9c7359b40fa393eeeaa0d166c2f70175be956e75fa24197c44/rpds_py-2026.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:88647f43a73c4e01be19b04ceef0c8d3a1958153604d13c773becd8016f2a0cf", size = 228519, upload-time = "2026-05-28T11:59:54.505Z" },
+    { url = "https://files.pythonhosted.org/packages/29/60/a73bfdd45b096574556acf303bbd9fa9eed36ca8a818b514e2a5d5fe2b9d/rpds_py-2026.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:453895624ecf7db7063b1004e44037522bbaef9ff6a945e59bc71662d7a03abd", size = 223446, upload-time = "2026-05-28T11:59:56.081Z" },
+    { url = "https://files.pythonhosted.org/packages/18/e2/408105fd611823f00882aea810f3989a30d26b1bab8b6beb20f98c724e0e/rpds_py-2026.5.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:b4e4bc98639ec915f512fde3aa7a95e0041d95d9c3cc86eea841fa63cb1e8600", size = 355287, upload-time = "2026-05-28T11:59:57.448Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/58/5c4a43436843c90d0f6d19f82c200c80e3843ca9fa07b237623327f6d384/rpds_py-2026.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cacedb7a6e167680acba45ad5716e89067d225dc80da0d7040cae8c81d4572fa", size = 347033, upload-time = "2026-05-28T11:59:58.881Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/c2/1a71acdacaf4e259b10278fb87b039ded3cf80041bcd89dd8a3ea702ded6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68700371c5d7ae1412862ddfa719090925c93ecf351c566d66f09d04b136ea00", size = 376891, upload-time = "2026-05-28T12:00:00.516Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/c8/535f3d9b65addd8e28aa87b83c6e526799c3717a88273db8ea795beeef7a/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:296c799becfa849c779c8725494fe9ed94959ed886787df4364b058465bad7f0", size = 385646, upload-time = "2026-05-28T12:00:02.394Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/91/dc033f313345c354ade914dbe73cdb90b615a4409ea02430d5356794f3d8/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d3858b908218ee108d0bbfb2095ccc237648053c9bf98affad7cb079acaf1d97", size = 498830, upload-time = "2026-05-28T12:00:04.189Z" },
+    { url = "https://files.pythonhosted.org/packages/27/fc/90fcbea459dbb8ddc18a2e0fd1de9412b48bc84ffff2db771cf714bacfd6/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4fb8d2e7cb2f850b169806d61d1b991738acec96500a75c30f49caf064ce7cef", size = 392830, upload-time = "2026-05-28T12:00:05.797Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/1d/46cd11a228c9750684a798d98f878be6f614aa762438da7378f035e79e35/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27b74c10ed6a8f190f4287f53bcfea348b92a84a9c9f70d30183d1e6172d580d", size = 379613, upload-time = "2026-05-28T12:00:07.433Z" },
+    { url = "https://files.pythonhosted.org/packages/24/4a/d9b0c6af3a1de03eb93741bbe8be2bdce84d8fda8224f3005451d86df389/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:b9a6528956191c48c52294a592dbd4a8386d7048bdb25c0efcb6b966466c6d83", size = 388183, upload-time = "2026-05-28T12:00:09.227Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/b4/db7aaabdda6d020afc87d981bcc2f57a434c7dec60ecfc2ab3dd50b20351/rpds_py-2026.5.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af03e34e860047bc7a352b842856fcf78798fbb81132cc98bd2f907ab4eb9cd2", size = 408578, upload-time = "2026-05-28T12:00:10.779Z" },
+    { url = "https://files.pythonhosted.org/packages/08/d6/070f6a41cbb343e2ac4171859bf3f3623e0ab002f72619d6d505313ec2de/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fea6e836d10abbe191d557d33bd58bd5987725fe63aa1eefe557d230209855bd", size = 553573, upload-time = "2026-05-28T12:00:12.443Z" },
+    { url = "https://files.pythonhosted.org/packages/75/ab/1a71ea3589c4345dac0a0518f0e6a031cb42689277851b683c46d27463a5/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:fc0c0f878ea770a0a8a462456c5ad36fc9fe6358e6b76fdadc7f17575e0b8bf1", size = 620861, upload-time = "2026-05-28T12:00:14.09Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/22/9bf80a56069c0c443fcfefac639a86a744550a2898817a6dfd3e26654924/rpds_py-2026.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e0b360f316d966b048b085857630b3cc51f3db2f07b06f440eac8f695374d1e3", size = 585633, upload-time = "2026-05-28T12:00:15.66Z" },
+    { url = "https://files.pythonhosted.org/packages/da/68/3b2c0a75c9e04125696f84ebdbbf304acf5a40b58ba4481cdb98a922c3ba/rpds_py-2026.5.1-cp313-cp313t-win32.whl", hash = "sha256:a2999883eedf72fdfb7520b92c7d4ec2572a71ff40239377aa604cc529eecafc", size = 210074, upload-time = "2026-05-28T12:00:17.291Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/8b/609157d5a25d37d4f29f92840ba531f416907c34ae5c5739dd21fc2bef98/rpds_py-2026.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e07be2a9d7122bd6e82dea89814ef8dc893feb1aae97fec1630f3263bbb30e55", size = 228635, upload-time = "2026-05-28T12:00:18.73Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/6f/19c1918a4b590d8de87e712e4abe4b3875771eff60216fb6153cf6665c68/rpds_py-2026.5.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:1f2c391c3059798093b65df23aca2cac150460ae9c630d99dec83d703d9485b9", size = 349756, upload-time = "2026-05-28T12:00:20.217Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/60/a06fe7da34eca79dacbf958a2ba0c6eea85bc2b29de20080bf40f72f66fa/rpds_py-2026.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:413b424f7c4ee65ab5e5be91f5731be0f8b41a1ee2b12dfe810d716312e95a78", size = 343831, upload-time = "2026-05-28T12:00:21.711Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/ec/b2333b97b90e2a6ef6ca8ad386ee284968e74bcfe113b3f1a8d9036429a9/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c595a1d9255dce0599e13130d1440ab2506654f2b50294226ee06402f8fef63", size = 375127, upload-time = "2026-05-28T12:00:23.326Z" },
+    { url = "https://files.pythonhosted.org/packages/14/7f/e00aae54067f2b488c4637961d5f58204d470795fc791085fa3f15060d2e/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c27c5f6102eac8c03e7595a00827a53b271ba40a53b59ff8709170e0855ea4a", size = 379034, upload-time = "2026-05-28T12:00:24.89Z" },
+    { url = "https://files.pythonhosted.org/packages/be/cc/423999bbb8ae8dc93c77fc1d5e984ade5eb89d237d3bb884ccfa72ae2890/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c7fcf61d44cacecaf3aea542b0e053db77972a4573e7ceda16fb2b399161195", size = 490823, upload-time = "2026-05-28T12:00:26.676Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/aa/c671bf660f12e68d3c52ff86c7066ed1372df5a0f4f2ff584e419b8207e7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c817a189d4ee14290420e5ff051e4dd6baa13f3edf84685071dee07a6d538ee", size = 388144, upload-time = "2026-05-28T12:00:28.577Z" },
+    { url = "https://files.pythonhosted.org/packages/19/c8/d63bb75b68afe77b229e3021c6031bcaf01da5db5b0e69d0d10f9ba679a7/rpds_py-2026.5.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21846aac0ed2e0589f38c12dc44e77bb64e494b771eadbcf169cba00566ba7ba", size = 371959, upload-time = "2026-05-28T12:00:30.304Z" },
+    { url = "https://files.pythonhosted.org/packages/82/35/c51122014d8274ff37dc606d60049c3db7d83da02b5b282511e5a906a9a6/rpds_py-2026.5.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b317c87a13f769a4e787819bd508aaa5d69aa09b0880de9af6d3a8a54571cdec", size = 383558, upload-time = "2026-05-28T12:00:31.764Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/f9/2790cb99c136a5363acdeacf5c27c56f3de0d4118a1f48fca83404c99c89/rpds_py-2026.5.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce87129d9f2c14fa6c4a8601fb80eb4488c80d38a20cd13758ef11123e14995d", size = 402789, upload-time = "2026-05-28T12:00:33.247Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/1b/e4fb584f8c75d35c38150ff6a332cda949e6f97acba1f4fd123b14ab56fe/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9cdddb6c1207d284d94fd1530adf57fbd797fe7c4b8704ba85f49414f2557e7d", size = 551405, upload-time = "2026-05-28T12:00:34.819Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/f7/a6731b4216cb3793ea1af5391da240f5683dacc0d13e034fe5fc3503f240/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4e237e139f94d3c036fd28eb9f564c99055476ff4ff05cd42be55ce349b5aa02", size = 616975, upload-time = "2026-05-28T12:00:36.268Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ea/2e051a81d95d8e63f4b35a1c463a87e8766bc3d083c067c5dfb6bf220747/rpds_py-2026.5.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ed0954b524873214369184a9c82b0eaa45a3fbb9a798cd95b17e0d98499e7ea0", size = 578701, upload-time = "2026-05-28T12:00:37.82Z" },
+    { url = "https://files.pythonhosted.org/packages/65/56/b5f6fdb2083e32bca8a8993d89e70db114b4756c9e2c38421328126689d2/rpds_py-2026.5.1-cp314-cp314-win32.whl", hash = "sha256:2d88621d6a7d4dfa633d21abe90f280bb205274e16b1d1e61c6ad4640b2453b7", size = 209806, upload-time = "2026-05-28T12:00:39.492Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/80/65a5aa96c155e611d1ed844e4e1f57f3e36b021f396d9f8585d756e6b90d/rpds_py-2026.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:cef8ac28d26f4dda3533060c20fbf80a325458fa9fd23ea72a73cdfa8e978838", size = 225985, upload-time = "2026-05-28T12:00:40.94Z" },
+    { url = "https://files.pythonhosted.org/packages/27/7c/ad185212e87b05f196daef92bc5f3caf07298eb47c295b5585c3dd3093ac/rpds_py-2026.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:eaaea962c68cdc68d4a533ba985ab8e9484277910bbfaa2ab3ef7732667bfed8", size = 221219, upload-time = "2026-05-28T12:00:43.15Z" },
+    { url = "https://files.pythonhosted.org/packages/23/58/e14ae18759020334646b031e708ab4158d653a938822bfb7b95ef2e93aa3/rpds_py-2026.5.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:21942f52dbbd5f8758bf021213d28bd45c39e873e65e2407faf5f1846f5761ad", size = 352148, upload-time = "2026-05-28T12:00:44.638Z" },
+    { url = "https://files.pythonhosted.org/packages/31/9b/5f4a1e2f960bca3ac5d052b139dd31eed97b259f9d909173821760d542e8/rpds_py-2026.5.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f414556f6e3958300ff941e40c9f97e3dc9774ddd1b3434c475d73dd354bbed3", size = 345196, upload-time = "2026-05-28T12:00:46.14Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/71/1d9574d6a2fa20ab60eaa55c7467f5aa20cbc770f341a05f09c0876f59e2/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1013a8625c74043210190b246f5b1551e09757c1f356c6e4160ef96c5bc081", size = 374981, upload-time = "2026-05-28T12:00:47.531Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/9a/37e99f4915a80aa71670263c1267f7ae0af95f53a3f61e6c3bdc016d4515/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cc68e231a77a5f0d774ae278a1f8e55c0456501820847c1e4efb3829f3441df6", size = 379961, upload-time = "2026-05-28T12:00:49.216Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ff/6e73f74b89d2e0715e0fc86b7dde893f9a61ae2f9b256ff3bdfe41ac4e94/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9baffb505aff33acc69b422a19f77806680f3c8632227d79f48de8a810d1c2c5", size = 495965, upload-time = "2026-05-28T12:00:51.111Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/e0/425faba25f59d74d4638b267f7c7a80e8649d2ef4db10a19b0c4a71e6e6f/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8d2f912928d426e8cfa396f7f3f8d29a59e6689c86dcca3c420730c1096322b", size = 389526, upload-time = "2026-05-28T12:00:52.77Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/76/7a41960e3fddae47fab43a28684d5da981401dffd88253de0944148654cb/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f628283be835db980c941767d41c9a27b5239e54ba0a9c1335247e82406964", size = 376190, upload-time = "2026-05-28T12:00:54.215Z" },
+    { url = "https://files.pythonhosted.org/packages/27/60/5f38dc70824fc6951b51d35377e577a3a3a4c81a6769cc5a2de25ebe0ad1/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:1ebb2f0ab7e16132995a72de805170e0203df0c3dd22e1ef1cd1fdd90bd7a131", size = 383921, upload-time = "2026-05-28T12:00:55.673Z" },
+    { url = "https://files.pythonhosted.org/packages/60/1a/d60a38caa1505f4b9483c3fbbde12c94e1079154f4f401a6da96f7e77621/rpds_py-2026.5.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f3df3d16ded76f1f8c9cdebd0e1ea55fdf4c23b812de189814da7cf229c22a81", size = 404766, upload-time = "2026-05-28T12:00:57.518Z" },
+    { url = "https://files.pythonhosted.org/packages/87/ff/602fd3f174d6425f0bce05ad0dfbec0e96b38d0f7d08a79af5aa20083885/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9af8905b8f854990e40d5206aa5ac58d9b0fe0b7f351ff2bb086c20f6c8c6a47", size = 551343, upload-time = "2026-05-28T12:00:58.978Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/c1/1be13327acdbead3eca1fde03b6a34dbb011f1e864e217f0d32cc1779a7f/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:036a36a87fb1cd3b214d11c4b3c4f7d2ddad933625dca1c900b56a057c07740a", size = 618502, upload-time = "2026-05-28T12:01:00.656Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/d7/afb49b49d7f2be8b7ba1a9f0977fa5168003437b93086726f066544e8351/rpds_py-2026.5.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:62ae3853454fe9ef283a03c96c2d835d39e84b14643a9d62c82ef0fb87d702ca", size = 581916, upload-time = "2026-05-28T12:01:02.22Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d1/dbef8c1f8a10f07beb62b5f054e20099fd9924b3ec001b8f0b6ac7813a85/rpds_py-2026.5.1-cp314-cp314t-win32.whl", hash = "sha256:6c3d771a46ec18b12af06ce36243a9a80b07a5d0515236332d90863ca8bb326a", size = 207855, upload-time = "2026-05-28T12:01:03.821Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/72/bfa4e61ab8e7dc1c8adf397e05e6cbdd4239357bd72b248d3de662f23915/rpds_py-2026.5.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c93c629be4636cf54337bd5f06c104d55e42ced54d681f6fe21ae510a65116f6", size = 225422, upload-time = "2026-05-28T12:01:05.194Z" },
+    { url = "https://files.pythonhosted.org/packages/27/3a/7b5da92b640f67b6717ccafc83cdd06bfa7ff2395c3685c68922bb54d703/rpds_py-2026.5.1-cp315-cp315-macosx_10_12_x86_64.whl", hash = "sha256:3574b55c604b8f75dacb007136508bbc0db406e626301778096a133327e7f2fb", size = 349576, upload-time = "2026-05-28T12:01:06.722Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/8a/2aafd7ad355a1bd48ca76e2262b74b15e6432b5a1efe150efd4d779cd55d/rpds_py-2026.5.1-cp315-cp315-macosx_11_0_arm64.whl", hash = "sha256:94068eb3ae6d43f5a786b7db96a406a34e6d5c24489feef32fd6e8946ea7b291", size = 343640, upload-time = "2026-05-28T12:01:08.441Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/7d/6c9523c1abbe840a1b7fba3c516d48e1d3487cc80fea4366c4071cf56784/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a5b10e8ce894825f380a8f1b6444cf73c294dfea62afbb2d13e3a9e630cec1", size = 375322, upload-time = "2026-05-28T12:01:09.934Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/5d/0b7b03fb1dc509321f01de3149784ab773e34c8573022029af8076afcb9c/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc09f82e63d4bcd58149572f857a431bae851dc747e313c3b5bdf7abb907fda8", size = 379066, upload-time = "2026-05-28T12:01:11.48Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/e2/8ef6012999ebf1cb1c22f876d9ce5e63d960fd4631d2af3202d3f480aa25/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e10464d17df3b582745c25cec695cb9558bca2cb6ddb631aee1787fc72c767b2", size = 494586, upload-time = "2026-05-28T12:01:13.051Z" },
+    { url = "https://files.pythonhosted.org/packages/80/af/1eeb029bec67582c226b7809172207cd005073af4ebd906e65ff494f4983/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ba05adbf15d994c38ec0b7ab32e858e5110c21e9009a00a86545fd220f84e038", size = 388415, upload-time = "2026-05-28T12:01:14.631Z" },
+    { url = "https://files.pythonhosted.org/packages/18/23/ffbe10711c4d766c1cab0557d6906c074f795814863c67b351355d29354a/rpds_py-2026.5.1-cp315-cp315-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77c004fdc7b891967106f78ddfd7b076bfe6813c6139c6fff6aed3bcaa960b26", size = 372427, upload-time = "2026-05-28T12:01:16.153Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/3a/30ba4a6ad457e5b070c18d742a33fb77d8d922b565cc881f8a5313d63bfe/rpds_py-2026.5.1-cp315-cp315-manylinux_2_31_riscv64.whl", hash = "sha256:83bcf894486c9d78dd290d3c0124ff6dd8875d3025e2090a8ec49fcc37c55fdd", size = 383615, upload-time = "2026-05-28T12:01:17.809Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/69/62e242b53ce39c0814bd24e1a6e6eba6c92be716277745f317f9540a2e7b/rpds_py-2026.5.1-cp315-cp315-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3df104083952a0e0c6f10de33e440eabe98fb6317d23e1a58c68f6df08d01b9", size = 402786, upload-time = "2026-05-28T12:01:19.419Z" },
+    { url = "https://files.pythonhosted.org/packages/38/c1/a770b9c186928a1ed0f7e6d7ae50e7f3950ed23e3f9e366dbc8e38cb55de/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_aarch64.whl", hash = "sha256:980450826cf22e133c57e0835070bdd0dd3f73b9b708c3ce223def2cb9469e14", size = 551583, upload-time = "2026-05-28T12:01:21.013Z" },
+    { url = "https://files.pythonhosted.org/packages/21/7c/68e8579b95375b70d2a963103c42e705856cdb98569258bd807f4423891c/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_i686.whl", hash = "sha256:205dde846f24332ab0c1188699a043b8d165b79bb84529ce272c45048ff6be01", size = 616941, upload-time = "2026-05-28T12:01:22.548Z" },
+    { url = "https://files.pythonhosted.org/packages/70/a1/a6135aed5730ff03ab957182259987ac11e55fb392a28dc6f0592048a280/rpds_py-2026.5.1-cp315-cp315-musllinux_1_2_x86_64.whl", hash = "sha256:3966b82dd563176396df030f3dd52a6e54cb69b718e95e78bd555ed3d1e0185d", size = 578349, upload-time = "2026-05-28T12:01:24.118Z" },
+    { url = "https://files.pythonhosted.org/packages/09/6e/f24201a76a84e6c49d0bdfdfcb735210e21701e9b21c5bfc0ba497dd62f6/rpds_py-2026.5.1-cp315-cp315-win32.whl", hash = "sha256:7818f8d0a415be74d2be3590b0a1c1f463a642f4d0217e7d10602dceef5b79aa", size = 209922, upload-time = "2026-05-28T12:01:25.522Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/e4/966bc240bb0485fc265278f6de44d05834bf0b3618886e0b22e33d54c49a/rpds_py-2026.5.1-cp315-cp315-win_amd64.whl", hash = "sha256:b3cc20c0d800af78fd0fac68086e28c1856cec51ea528bb81ea851aa40d39325", size = 226003, upload-time = "2026-05-28T12:01:27.062Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/5c/a15a59269cd5e74472734516c73795c15eccfc841b3d4b0228c3f53f19d0/rpds_py-2026.5.1-cp315-cp315-win_arm64.whl", hash = "sha256:3609e9939a8a76cd904cf98a3f1f13b5dc7e150adeaee89e0ea09652ea213e16", size = 221245, upload-time = "2026-05-28T12:01:28.51Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/22/135ce03804e179a71ceb13be095deda4a279bc88f7a6b8fa161c5ad44e12/rpds_py-2026.5.1-cp315-cp315t-macosx_10_12_x86_64.whl", hash = "sha256:5d333a7127d4b307601ac37792bee01bb95c867cbfacf21b6375b804d6bbd723", size = 352015, upload-time = "2026-05-28T12:01:30.214Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/5f/f1f6d2652eb9d848f6eb369d8db83a2da6249bb49ad2c2a48f45d54538d3/rpds_py-2026.5.1-cp315-cp315t-macosx_11_0_arm64.whl", hash = "sha256:b5f077b44a4f7808520f66dae234988d867deb9aed9be5da057ce9ba831b2a41", size = 345016, upload-time = "2026-05-28T12:01:31.656Z" },
+    { url = "https://files.pythonhosted.org/packages/88/66/b74182775691ea2290c99e52ac8d5db844e56fbec90ce421f107658c8314/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d8f9b7b78c9538fc9e04e82ec0e888ff0c3cffcfad152c77e57cd09351a98a", size = 374775, upload-time = "2026-05-28T12:01:33.136Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/8f/15e5a61d9f0a43902d36561d4f07cae6ae9f4716be825159fd72717f33af/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e3a8ae58895ac107ed934a6bf51e5846f95c53b9b940c2c6d310838fd5846358", size = 380270, upload-time = "2026-05-28T12:01:34.574Z" },
+    { url = "https://files.pythonhosted.org/packages/02/c3/f859b12763a80540cdf2af0f15b19904cf756a71d7bdd3f82ff3e5b1bbf9/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0957cf3c2b8632ec7aaebffebea8005b353cc2a237b6e2ae3c2cac0820704cfb", size = 495285, upload-time = "2026-05-28T12:01:36.127Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c7/ff27c2ac8411d30b03b1829fd88cae8dad1a4d0da48dd25e57c4038042e6/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c396c1304de421050b3681ea70f371874b54d41b0151e96109758144c231e30b", size = 389581, upload-time = "2026-05-28T12:01:37.635Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/67/fe92ee32a6cc05c77228a2f8b1762e7124f386ec20ff83d0757b762d58d0/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad1bff7f666b9598e573815affd666aac6a13a585dde336f843e33350c7fadc", size = 376041, upload-time = "2026-05-28T12:01:39.307Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/91/b4d6685c27aba55bd82f25b278be8237038117d05f9659a6213ad3408130/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_31_riscv64.whl", hash = "sha256:656a042550878f12d45752452d47094b7cfe5ad1e9d7b87b5a22ad3ae5ff8015", size = 383946, upload-time = "2026-05-28T12:01:41.043Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/79/2c1d832a53c8e0f8e98fc970ec257b950fecd4f62be2ab7182b500a0cbc8/rpds_py-2026.5.1-cp315-cp315t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c4bd4f70294737b5206a3e8e30ccadbf8a60301831c8ea23eec5dbeea1ecfa", size = 405526, upload-time = "2026-05-28T12:01:43.032Z" },
+    { url = "https://files.pythonhosted.org/packages/78/c4/c98117b03c6a8581ab2c2dfccfe9a5ad82bd8128a3c28b46a6ad2d97c393/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_aarch64.whl", hash = "sha256:43bca78665423cabae77146f2fe7ce55272b6c8d55d82cca83effd42c7e13972", size = 551165, upload-time = "2026-05-28T12:01:44.648Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/c1/bc479ca069200af730881b1bd525e3114b2b391a351509fcb1b772f28086/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_i686.whl", hash = "sha256:42d0f20e85e549c870749d0e247f0c10d318a45b7e9676d575d2dcb04a1b2e66", size = 618778, upload-time = "2026-05-28T12:01:46.337Z" },
+    { url = "https://files.pythonhosted.org/packages/77/65/38ab2f90df44c2febfb63cc10ced40763d9b4bc94d173e734528663fe7f5/rpds_py-2026.5.1-cp315-cp315t-musllinux_1_2_x86_64.whl", hash = "sha256:b1be5c35683684d5331b93600c210e8367c254683d8a6df6bd21bd2da3a334fb", size = 581839, upload-time = "2026-05-28T12:01:48.109Z" },
+    { url = "https://files.pythonhosted.org/packages/15/2d/ce1f605fe036aadd460e5822e578c6c7ec3a860936cca37d6e0f299daa77/rpds_py-2026.5.1-cp315-cp315t-win32.whl", hash = "sha256:75808f6c38ce7749bb68cc2770161aae5045e6c6f6781a9782e74b93304399df", size = 207866, upload-time = "2026-05-28T12:01:49.648Z" },
+    { url = "https://files.pythonhosted.org/packages/79/cb/966040123eb102371559746908ef2c9471f4d43e17ec9a645a2258dab64b/rpds_py-2026.5.1-cp315-cp315t-win_amd64.whl", hash = "sha256:90bd6630002a1c7f09e7843dd79f0d24f3d2897cc25a753480917865d14f15b3", size = 225441, upload-time = "2026-05-28T12:01:51.408Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
diff --git a/pipeline/workflow/spanner-ingestion-workflow.yaml b/pipeline/workflow/spanner-ingestion-workflow.yaml
index d0a878067..4bc2d5c6e 100644
--- a/pipeline/workflow/spanner-ingestion-workflow.yaml
+++ b/pipeline/workflow/spanner-ingestion-workflow.yaml
@@ -121,38 +121,52 @@ main:
 run_aggregation_job:
   params: [import_list, helper_url]
   steps:
-    - run_aggregation:
+    # 1. Start the aggregation and get the initial state
+    - initiate_aggregation:
         call: http.post
         args:
-          url: ${helper_url + "/aggregation/run"}
+          url: ${helper_url + "/aggregation/initiate"}
           timeout: 300
           auth:
             type: OIDC
           body:
             importList: ${import_list}
-        result: aggregation_response
-    - check_aggregation_status_loop:
+        result: initiate_response
+    # 2. Store the state in a workflow variable
+    - assign_state:
+        assign:
+          - state: ${initiate_response.body}
+    # 3. State Check loop
+    - check_status_loop:
+        switch:
+          # Exit successfully if done
+          - condition: ${state.status == "SUCCEEDED"}
+            return: "OK"
+          # Raise error if failed
+          - condition: ${state.status == "FAILED"}
+            raise: ${state.error}
+        next: poll_and_wait
+    # 4. Sleep and Poll
+    - poll_and_wait:
         steps:
-          - wait_for_aggregation:
+          - wait_step:
               call: sys.sleep
               args:
                 seconds: 300
-          - check_aggregation_status:
+          # Pass the state back to the server, get the new state
+          - poll_server:
               call: http.post
               args:
-                url: ${helper_url + "/aggregation/status"}
+                url: ${helper_url + "/aggregation/poll"}
                 auth:
                   type: OIDC
-                body:
-                  jobIds: ${aggregation_response.body.jobIds}
-              result: aggregation_status_response
-          - evaluate_aggregation_status:
-              switch:
-                - condition: ${aggregation_status_response.body.status == "DONE"}
-                  return: 'OK'
-                - condition: ${aggregation_status_response.body.status == "FAILED"}
-                  raise: ${aggregation_status_response.body.error}
-              next: check_aggregation_status_loop
+                body: ${state}
+              result: poll_response
+          - update_state:
+              assign:
+                - state: ${poll_response.body}
+        next: check_status_loop
+
 
 # This sub-workflow launches a Dataflow job and waits for it to complete.
 run_dataflow_job:

From d2e3128e3b6bca95090c076be3d6bc66d4823749 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 14:18:09 +0530
Subject: [PATCH 02/33] fix(aggregation): handle PENDING state in polling and
 short-circuit empty job list

- Fixed a bug where a PENDING BigQuery job could cause premature transition to the next stage.
- Changed the transition check to strictly require a DONE status.
- Added short-circuiting for empty active job lists.
- Added a new integration test 'test_aggregation_poll_still_running' to verify PENDING handling.
---
 .../workflow/ingestion-helper/app_test.py     | 28 +++++++++++++++++++
 .../ingestion-helper/routes/aggregation.py    | 13 +++++----
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/app_test.py b/pipeline/workflow/ingestion-helper/app_test.py
index d19fdfb7e..12862c5dc 100644
--- a/pipeline/workflow/ingestion-helper/app_test.py
+++ b/pipeline/workflow/ingestion-helper/app_test.py
@@ -315,6 +315,34 @@ def test_aggregation_poll_transition(self, mock_aggregation_utils):
         self.assertEqual(state["current_stage"], 2) # Transitioned to 2!
         self.assertEqual(state["active_job_ids"], ["job-stage2-1"])
 
+    @patch('routes.aggregation._get_orchestrator')
+    def test_aggregation_poll_still_running(self, mock_aggregation_utils):
+        # Setup mock orchestrator to simulate jobs still in PENDING state
+        mock_instance = MagicMock()
+        mock_aggregation_utils.return_value = mock_instance
+        
+        # Mock BQ reporting Stage 1 jobs are PENDING (still executing)
+        mock_instance.check_jobs_status.return_value = {"status": "PENDING"}
+
+        # Input state
+        payload = {
+            "status": "RUNNING",
+            "current_stage": 1,
+            "active_job_ids": ["job-1", "job-2"],
+            "import_list": [{"importName": "USFed_Census"}]
+        }
+        
+        # Call endpoint
+        response = client.post("/aggregation/poll", json=payload)
+
+        # Assertions
+        self.assertEqual(response.status_code, 200)
+        state = response.json()
+        # Verify state is returned unchanged
+        self.assertEqual(state["status"], "RUNNING")
+        self.assertEqual(state["current_stage"], 1)
+        self.assertEqual(state["active_job_ids"], ["job-1", "job-2"])
+
     @patch('routes.aggregation._get_orchestrator')
     def test_aggregation_legacy_run(self, mock_aggregation_utils):
         # Setup mock orchestrator
diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index ce7434763..5f1f59e90 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -128,8 +128,11 @@ def poll_aggregation(state: StateObject):
         import_names = [item.get('importName') for item in state.import_list if item.get('importName')]
 
         # 1. Check status of active jobs in BigQuery
-        logging.info(f"Polling status for jobs in Stage {state.current_stage}: {state.active_job_ids}")
-        bq_status = orchestrator.check_jobs_status(state.active_job_ids)
+        if not state.active_job_ids:
+            bq_status = {"status": "DONE"}
+        else:
+            logging.info(f"Polling status for jobs in Stage {state.current_stage}: {state.active_job_ids}")
+            bq_status = orchestrator.check_jobs_status(state.active_job_ids)
         
         # Case A: Any job failed
         if bq_status["status"] == "FAILED":
@@ -142,9 +145,9 @@ def poll_aggregation(state: StateObject):
                 error=bq_status.get("error")
             )
             
-        # Case B: Jobs are still running
-        if bq_status["status"] == "RUNNING":
-            logging.info(f"Stage {state.current_stage} is still executing.")
+        # Case B: Jobs are still executing (explicitly check for DONE to transition)
+        if bq_status["status"] != "DONE":
+            logging.info(f"Stage {state.current_stage} is still executing (status: {bq_status['status']}).")
             return state # Return unchanged
             
         # Case C: All jobs succeeded -> Find and execute the next active stage

From 43f3c56fe2987b2a4c786b13d746a86e7a14bbf2 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 14:20:29 +0530
Subject: [PATCH 03/33] feat(aggregation): calculate max_stage dynamically from
 configuration

- Replaced all hardcoded upper limits of stage '10' in API routes and legacy wrappers.
- Dynamically calculate the maximum stage from the loaded aggregations config.
- Updated integration tests in app_test.py to mock the aggregations list for correct stage limit evaluation.
---
 pipeline/workflow/ingestion-helper/app_test.py           | 2 ++
 pipeline/workflow/ingestion-helper/routes/aggregation.py | 9 ++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/app_test.py b/pipeline/workflow/ingestion-helper/app_test.py
index 12862c5dc..f82003676 100644
--- a/pipeline/workflow/ingestion-helper/app_test.py
+++ b/pipeline/workflow/ingestion-helper/app_test.py
@@ -290,6 +290,7 @@ def test_aggregation_poll_transition(self, mock_aggregation_utils):
         # Setup mock orchestrator to simulate Stage 1 completion and Stage 2 execution
         mock_instance = MagicMock()
         mock_aggregation_utils.return_value = mock_instance
+        mock_instance.aggregations = [{"stage": 1}, {"stage": 2}]
         
         # Mock BQ reporting Stage 1 jobs are DONE
         mock_instance.check_jobs_status.return_value = {"status": "DONE"}
@@ -348,6 +349,7 @@ def test_aggregation_legacy_run(self, mock_aggregation_utils):
         # Setup mock orchestrator
         mock_instance = MagicMock()
         mock_aggregation_utils.return_value = mock_instance
+        mock_instance.aggregations = [{"stage": 1}, {"stage": 2}]
         mock_instance.has_stage.side_effect = lambda stage, imports: stage in [1, 2]
         mock_instance.execute_stage.side_effect = lambda stage, imports: [f"job-stage{stage}-1"]
 
diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index 5f1f59e90..f28204df9 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -94,7 +94,8 @@ def initiate_aggregation(req: InitiateRequest):
         
         # Find the first stage that has active aggregations (usually Stage 1)
         first_stage = 1
-        while first_stage <= 10: # Arbitrary upper limit for safety
+        max_stage = max((cfg.get("stage", 1) for cfg in orchestrator.aggregations), default=1)
+        while first_stage <= max_stage:
             if orchestrator.has_stage(first_stage, import_names):
                 break
             first_stage += 1
@@ -152,7 +153,8 @@ def poll_aggregation(state: StateObject):
             
         # Case C: All jobs succeeded -> Find and execute the next active stage
         next_stage = state.current_stage + 1
-        while next_stage <= 10: # Arbitrary upper limit
+        max_stage = max((cfg.get("stage", 1) for cfg in orchestrator.aggregations), default=1)
+        while next_stage <= max_stage:
             if orchestrator.has_stage(next_stage, import_names):
                 logging.info(f"Stage {state.current_stage} completed. Transitioning to Stage {next_stage}...")
                 new_job_ids = orchestrator.execute_stage(next_stage, import_names)
@@ -201,7 +203,8 @@ def run_aggregation_legacy(req: LegacyAggregationRequest):
         
         # Compatibility Mode: Submit ALL enabled stages in parallel
         job_ids = []
-        for stage_num in range(1, 10):
+        max_stage = max((cfg.get("stage", 1) for cfg in orchestrator.aggregations), default=1)
+        for stage_num in range(1, max_stage + 1):
             if orchestrator.has_stage(stage_num, import_names):
                 job_ids.extend(orchestrator.execute_stage(stage_num, import_names))
                 

From ce20588fa506801a1bc0591f9120ee4c13136b1b Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 14:22:37 +0530
Subject: [PATCH 04/33] style(logging): follow logging best practices and
 remove module-level side effects

- Removed module-level logging.basicConfig call from validator.py.
- Added logging.basicConfig inside validator's CLI main() function to ensure logging is configured only during standalone script execution.
- Removed global logging.getLogger().setLevel(logging.INFO) call from orchestrator.py to prevent altering root logger levels on module import.
---
 pipeline/workflow/ingestion-helper/aggregation/orchestrator.py | 2 --
 pipeline/workflow/ingestion-helper/aggregation/validator.py    | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index db823a8b4..a3fe88efc 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -24,8 +24,6 @@
 from .stat_var_group_generator import StatVarGroupGenerator
 from .validator import validate_config
 
-logging.getLogger().setLevel(logging.INFO)
-
 
 class AggregationOrchestrator:
     """Orchestrates the overall aggregation workflow."""
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator.py b/pipeline/workflow/ingestion-helper/aggregation/validator.py
index e1ac6d2e1..a2ac70a2d 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/validator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator.py
@@ -23,8 +23,6 @@
 import yaml
 import jsonschema
 
-logging.basicConfig(level=logging.INFO)
-
 # ANSI escape codes for colored terminal output
 GREEN = "\033[92m"
 RED = "\033[91m"
@@ -83,6 +81,7 @@ def validate_config(config_file_path: str, schema_file_path: str) -> List[Dict[s
 
 def main():
     """CLI entry point for standalone configuration validation."""
+    logging.basicConfig(level=logging.INFO)
     parser = argparse.ArgumentParser(description="Validate Data Commons aggregation configuration files against the JSON Schema.")
     
     # Resolve default paths relative to this script's directory (aggregation/)

From 4ab64686cfc596412c8fa0401cc3a355f59810c4 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 14:45:50 +0530
Subject: [PATCH 05/33] fix(aggregation): fail-fast on unsupported step types
 and fix validator schema bypass

- Added an else block in orchestrator's execute_stage() that raises ValueError for unsupported/unimplemented step types (like 'entity'), preventing silent failures.
- Removed the early-return validation bypass in validator.py, ensuring that jsonschema strictly validates missing 'aggregations' keys.
- Added unit test 'test_execute_stage_unsupported_type' in orchestrator_test.py to verify the ValueError fail-fast routing.
- Added unit tests 'test_validate_config_missing_aggregations_key' and 'test_validate_config_empty_file' in validator_test.py to cover the validation fixes.
---
 .../aggregation/orchestrator.py               |  2 ++
 .../aggregation/orchestrator_test.py          | 23 +++++++++++++++++--
 .../ingestion-helper/aggregation/validator.py |  5 ++--
 .../aggregation/validator_test.py             | 20 ++++++++++++++++
 4 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index a3fe88efc..0e7e0a133 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -135,6 +135,8 @@ def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
 
             elif step_type == "stat_var_groups":
                 step_jobs.extend(self.stat_var_group_generator.run_all(applicable_imports))
+            else:
+                raise ValueError(f"Unsupported or unimplemented aggregation step type: {step_type}")
 
             # Collect BQ jobs
             for job in step_jobs:
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index 4301944c5..64a4eefe3 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -53,8 +53,6 @@
 """
 
 
-
-
 @patch('aggregation.orchestrator.BigQueryExecutor')
 @patch('aggregation.orchestrator.PlaceAggregationGenerator')
 @patch('aggregation.orchestrator.StatVarAggregator')
@@ -172,6 +170,27 @@ def test_execute_stage_2_with_disabled_and_filtering(self, mock_file_open,
             skip_all_sources_present_check=True
         )
 
+    @patch('builtins.open')
+    def test_execute_stage_unsupported_type(self, mock_file_open, *mocks):
+        """Tests that an unsupported aggregation step type raises ValueError."""
+        # Use 'entity' which is valid in schema but unimplemented in orchestrator
+        unimplemented_config = """
+        aggregations:
+          - type: entity
+            entity_types: ["MortalityEvent"]
+            location_props: ["location"]
+            imports: ["*"]
+            stage: 1
+        """
+        mock_file_open.side_effect = self._get_mock_open(unimplemented_config)
+
+        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
+
+        # Running Stage 1 should raise ValueError due to unimplemented "entity" type
+        with self.assertRaises(ValueError) as ctx:
+            utils.execute_stage(stage_num=1, active_imports=["USFed_Census"])
+        self.assertIn("Unsupported or unimplemented aggregation step type: entity", str(ctx.exception))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator.py b/pipeline/workflow/ingestion-helper/aggregation/validator.py
index a2ac70a2d..c26427e8f 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/validator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator.py
@@ -57,9 +57,8 @@ def validate_config(config_file_path: str, schema_file_path: str) -> List[Dict[s
         logging.error(f"Failed to parse YAML file {config_file_path}: {e}")
         raise e
 
-    if not config or "aggregations" not in config:
-        logging.warning("Aggregation config is empty or missing 'aggregations' key.")
-        return []
+    if config is None:
+        config = {}
 
     # 2. Load JSON Schema
     try:
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
index fd38a35cc..34978f58d 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
@@ -284,6 +284,26 @@ def test_validate_config_missing_schema_file(self):
             with self.assertRaises(FileNotFoundError):
                 validate_config("aggregation.yaml", "non_existent_schema.json")
 
+    @patch('builtins.open')
+    def test_validate_config_missing_aggregations_key(self, mock_file_open):
+        """Verifies that missing the required 'aggregations' root key raises ValidationError."""
+        missing_aggregations_yaml = """
+        some_other_key: []
+        """
+        mock_file_open.side_effect = self._get_mock_open(missing_aggregations_yaml)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("'aggregations' is a required property", ctx.exception.message)
+
+    @patch('builtins.open')
+    def test_validate_config_empty_file(self, mock_file_open):
+        """Verifies that a completely empty configuration file raises ValidationError."""
+        empty_yaml = ""
+        mock_file_open.side_effect = self._get_mock_open(empty_yaml)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config("aggregation.yaml", self.schema_path)
+        self.assertIn("'aggregations' is a required property", ctx.exception.message)
+
 
 if __name__ == '__main__':
     unittest.main()

From 1c011963b2c95b8c9e9f88bf7cccb957af286758 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 14:52:03 +0530
Subject: [PATCH 06/33] perf(aggregation): optimize stage execution and resolve
 sparse stage loop performance issue

- Added a new get_active_stages() helper in orchestrator.py that returns a sorted list of unique active and enabled stage numbers.
- Replaced all sequential while and range loops in routes/aggregation.py with direct list-comprehension jumps using get_active_stages().
- This completely resolves the performance spike and timeout risk when a very large stage number (e.g. 100 million) is configured.
- Added unit test 'test_get_active_stages' in orchestrator_test.py.
- Updated and significantly simplified integration test mocks in app_test.py to mock get_active_stages() instead of aggregations/has_stage.
---
 .../aggregation/orchestrator.py               | 20 ++++++++-
 .../aggregation/orchestrator_test.py          | 21 +++++++++
 .../workflow/ingestion-helper/app_test.py     | 10 ++---
 .../ingestion-helper/routes/aggregation.py    | 45 +++++++++----------
 4 files changed, 64 insertions(+), 32 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index 0e7e0a133..de6b7e0da 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -165,9 +165,27 @@ def has_stage(self, stage_num: int, active_imports: List[str]) -> bool:
             # Check if it applies to any active imports
             if self._get_applicable_imports(config, active_imports):
                 return True
-                
+
         return False
 
+    def get_active_stages(self, active_imports: List[str]) -> List[int]:
+        """Returns a sorted list of unique, active, and enabled stage numbers.
+
+        Args:
+            active_imports: The list of active import names.
+
+        Returns:
+            A sorted list of unique active stage numbers.
+        """
+        stages = set()
+        for config in self.aggregations:
+            if config.get("disabled", False):
+                continue
+            # Check if it applies to any active imports
+            if self._get_applicable_imports(config, active_imports):
+                stages.add(config.get("stage", 1))
+        return sorted(list(stages))
+
     def check_jobs_status(self, job_ids: List[str]) -> Dict[str, Any]:
         """Checks the status of the specified BigQuery job IDs.
 
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index 64a4eefe3..2f9720995 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -98,6 +98,27 @@ def test_has_stage(self, mock_file_open, *mocks):
         # Stage 3: Does not exist in config
         self.assertFalse(utils.has_stage(3, ["USFed_Census"]))
 
+    @patch('builtins.open')
+    def test_get_active_stages(self, mock_file_open, *mocks):
+        """Tests that get_active_stages correctly extracts, filters, and sorts active stages."""
+        mock_file_open.side_effect = self._get_mock_open(VALID_CONFIG_YAML)
+
+        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
+
+        # 1. For active import "USFed_Census":
+        # Stage 1 (linked_edges, place) and Stage 2 (stat_var) have active steps.
+        # The place rollup in Stage 2 is disabled, but the stat_var step is enabled and active.
+        # Therefore, active stages should be [1, 2].
+        stages = utils.get_active_stages(active_imports=["USFed_Census"])
+        self.assertEqual(stages, [1, 2])
+
+        # 2. For active import "OtherImport":
+        # Stage 1 (linked_edges) matches via wildcard.
+        # Stage 2 (place rollup is disabled, stat_var does not match "OtherImport").
+        # Therefore, only Stage 1 is active. Active stages should be [1].
+        stages = utils.get_active_stages(active_imports=["OtherImport"])
+        self.assertEqual(stages, [1])
+
     @patch('builtins.open')
     def test_execute_stage_1(self, mock_file_open, 
                              mock_svg_gen, mock_prov_gen, mock_edge_gen, 
diff --git a/pipeline/workflow/ingestion-helper/app_test.py b/pipeline/workflow/ingestion-helper/app_test.py
index f82003676..65da0ca73 100644
--- a/pipeline/workflow/ingestion-helper/app_test.py
+++ b/pipeline/workflow/ingestion-helper/app_test.py
@@ -268,7 +268,7 @@ def test_aggregation_initiate_success(self, mock_aggregation_utils):
         # Setup mock orchestrator
         mock_instance = MagicMock()
         mock_aggregation_utils.return_value = mock_instance
-        mock_instance.has_stage.side_effect = lambda stage, imports: stage == 1
+        mock_instance.get_active_stages.return_value = [1]
         mock_instance.execute_stage.return_value = ["job-1", "job-2"]
 
         # Call endpoint
@@ -290,12 +290,10 @@ def test_aggregation_poll_transition(self, mock_aggregation_utils):
         # Setup mock orchestrator to simulate Stage 1 completion and Stage 2 execution
         mock_instance = MagicMock()
         mock_aggregation_utils.return_value = mock_instance
-        mock_instance.aggregations = [{"stage": 1}, {"stage": 2}]
+        mock_instance.get_active_stages.return_value = [1, 2]
         
         # Mock BQ reporting Stage 1 jobs are DONE
         mock_instance.check_jobs_status.return_value = {"status": "DONE"}
-        # Mock Stage 2 existence and execution
-        mock_instance.has_stage.side_effect = lambda stage, imports: stage == 2
         mock_instance.execute_stage.return_value = ["job-stage2-1"]
 
         # Input state (Stage 1 completed)
@@ -321,6 +319,7 @@ def test_aggregation_poll_still_running(self, mock_aggregation_utils):
         # Setup mock orchestrator to simulate jobs still in PENDING state
         mock_instance = MagicMock()
         mock_aggregation_utils.return_value = mock_instance
+        mock_instance.get_active_stages.return_value = [1]
         
         # Mock BQ reporting Stage 1 jobs are PENDING (still executing)
         mock_instance.check_jobs_status.return_value = {"status": "PENDING"}
@@ -349,8 +348,7 @@ def test_aggregation_legacy_run(self, mock_aggregation_utils):
         # Setup mock orchestrator
         mock_instance = MagicMock()
         mock_aggregation_utils.return_value = mock_instance
-        mock_instance.aggregations = [{"stage": 1}, {"stage": 2}]
-        mock_instance.has_stage.side_effect = lambda stage, imports: stage in [1, 2]
+        mock_instance.get_active_stages.return_value = [1, 2]
         mock_instance.execute_stage.side_effect = lambda stage, imports: [f"job-stage{stage}-1"]
 
         # Call legacy endpoint
diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index f28204df9..5860e6235 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -92,17 +92,13 @@ def initiate_aggregation(req: InitiateRequest):
         orchestrator = _get_orchestrator()
         import_names = [item.get('importName') for item in req.importList if item.get('importName')]
         
-        # Find the first stage that has active aggregations (usually Stage 1)
-        first_stage = 1
-        max_stage = max((cfg.get("stage", 1) for cfg in orchestrator.aggregations), default=1)
-        while first_stage <= max_stage:
-            if orchestrator.has_stage(first_stage, import_names):
-                break
-            first_stage += 1
-        else:
+        active_stages = orchestrator.get_active_stages(import_names)
+        if not active_stages:
             logging.info("No stages have active aggregations for the current imports. Completing immediately.")
             return StateObject(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=req.importList)
 
+        first_stage = active_stages[0]
+
         logging.info(f"Initiating aggregation at Stage {first_stage}")
         job_ids = orchestrator.execute_stage(first_stage, import_names)
         
@@ -152,19 +148,19 @@ def poll_aggregation(state: StateObject):
             return state # Return unchanged
             
         # Case C: All jobs succeeded -> Find and execute the next active stage
-        next_stage = state.current_stage + 1
-        max_stage = max((cfg.get("stage", 1) for cfg in orchestrator.aggregations), default=1)
-        while next_stage <= max_stage:
-            if orchestrator.has_stage(next_stage, import_names):
-                logging.info(f"Stage {state.current_stage} completed. Transitioning to Stage {next_stage}...")
-                new_job_ids = orchestrator.execute_stage(next_stage, import_names)
-                return StateObject(
-                    status="RUNNING",
-                    current_stage=next_stage,
-                    active_job_ids=new_job_ids,
-                    import_list=state.import_list
-                )
-            next_stage += 1
+        active_stages = orchestrator.get_active_stages(import_names)
+        next_stages = [s for s in active_stages if s > state.current_stage]
+        
+        if next_stages:
+            next_stage = next_stages[0]
+            logging.info(f"Stage {state.current_stage} completed. Transitioning to Stage {next_stage}...")
+            new_job_ids = orchestrator.execute_stage(next_stage, import_names)
+            return StateObject(
+                status="RUNNING",
+                current_stage=next_stage,
+                active_job_ids=new_job_ids,
+                import_list=state.import_list
+            )
             
         # If we exit the loop, there are no more active stages left
         logging.info("All aggregation stages completed successfully!")
@@ -203,10 +199,9 @@ def run_aggregation_legacy(req: LegacyAggregationRequest):
         
         # Compatibility Mode: Submit ALL enabled stages in parallel
         job_ids = []
-        max_stage = max((cfg.get("stage", 1) for cfg in orchestrator.aggregations), default=1)
-        for stage_num in range(1, max_stage + 1):
-            if orchestrator.has_stage(stage_num, import_names):
-                job_ids.extend(orchestrator.execute_stage(stage_num, import_names))
+        active_stages = orchestrator.get_active_stages(import_names)
+        for stage_num in active_stages:
+            job_ids.extend(orchestrator.execute_stage(stage_num, import_names))
                 
         return LegacyAggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=job_ids)
     except Exception as e:

From 92ae624f0e430ae112e220c834733ded7cc6994e Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 15:00:25 +0530
Subject: [PATCH 07/33] refactor(aggregation): rename StateObject to
 AggregationWorkflowState and add docstring

- Renamed the generic StateObject class to AggregationWorkflowState to better reflect its purpose in representing the state of the multi-stage aggregation workflow.
- Added a comprehensive, professional docstring to AggregationWorkflowState explaining its role in the stateless polling loop coordinated by Google Cloud Workflows.
- Updated all type annotations, route definitions, and return statements inside routes/aggregation.py.
- Verified that no other files in the workspace referenced the old class name directly, and that all 30 tests continue to pass 100%.
---
 .../ingestion-helper/routes/aggregation.py    | 28 +++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index 5860e6235..60c254e86 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -26,7 +26,13 @@
 # Pydantic Models for the New Stateless API
 # =============================================================================
 
-class StateObject(BaseModel):
+class AggregationWorkflowState(BaseModel):
+    """Represents the execution state of a multi-stage aggregation pipeline run.
+
+    This state object is passed back and forth between the client (Google Cloud
+    Workflows) and the helper service endpoints to durably maintain the progress
+    of a stateless, sequential aggregation run across multiple stages.
+    """
     status: str = Field(..., description="Overall status of the run: RUNNING, SUCCEEDED, FAILED")
     current_stage: int = Field(..., description="The stage currently executing")
     active_job_ids: List[str] = Field(default_factory=list, description="BQ job IDs running in the current stage")
@@ -80,13 +86,13 @@ def _get_orchestrator() -> AggregationOrchestrator:
 # New Stateless API Endpoints (Stage-based)
 # -----------------------------------------------------------------------------
 
-@router.post("/initiate", response_model=StateObject)
+@router.post("/initiate", response_model=AggregationWorkflowState)
 @log_start
 def initiate_aggregation(req: InitiateRequest):
     """Initiates the aggregation run by executing Stage 1 and returning the initial state."""
     if not req.importList:
         logging.info("Empty import list. Skipping aggregation.")
-        return StateObject(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=[])
+        return AggregationWorkflowState(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=[])
 
     try:
         orchestrator = _get_orchestrator()
@@ -95,14 +101,14 @@ def initiate_aggregation(req: InitiateRequest):
         active_stages = orchestrator.get_active_stages(import_names)
         if not active_stages:
             logging.info("No stages have active aggregations for the current imports. Completing immediately.")
-            return StateObject(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=req.importList)
+            return AggregationWorkflowState(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=req.importList)
 
         first_stage = active_stages[0]
 
         logging.info(f"Initiating aggregation at Stage {first_stage}")
         job_ids = orchestrator.execute_stage(first_stage, import_names)
         
-        return StateObject(
+        return AggregationWorkflowState(
             status="RUNNING",
             current_stage=first_stage,
             active_job_ids=job_ids,
@@ -113,9 +119,9 @@ def initiate_aggregation(req: InitiateRequest):
         raise HTTPException(status_code=500, detail=f"Failed to initiate aggregation: {str(e)}")
 
 
-@router.post("/poll", response_model=StateObject)
+@router.post("/poll", response_model=AggregationWorkflowState)
 @log_start
-def poll_aggregation(state: StateObject):
+def poll_aggregation(state: AggregationWorkflowState):
     """Checks progress of active jobs and transitions to the next stage if complete."""
     if state.status != "RUNNING":
         return state # Already in a terminal state
@@ -134,7 +140,7 @@ def poll_aggregation(state: StateObject):
         # Case A: Any job failed
         if bq_status["status"] == "FAILED":
             logging.error(f"Stage {state.current_stage} failed with error: {bq_status.get('error')}")
-            return StateObject(
+            return AggregationWorkflowState(
                 status="FAILED",
                 current_stage=state.current_stage,
                 active_job_ids=[],
@@ -155,7 +161,7 @@ def poll_aggregation(state: StateObject):
             next_stage = next_stages[0]
             logging.info(f"Stage {state.current_stage} completed. Transitioning to Stage {next_stage}...")
             new_job_ids = orchestrator.execute_stage(next_stage, import_names)
-            return StateObject(
+            return AggregationWorkflowState(
                 status="RUNNING",
                 current_stage=next_stage,
                 active_job_ids=new_job_ids,
@@ -164,7 +170,7 @@ def poll_aggregation(state: StateObject):
             
         # If we exit the loop, there are no more active stages left
         logging.info("All aggregation stages completed successfully!")
-        return StateObject(
+        return AggregationWorkflowState(
             status="SUCCEEDED",
             current_stage=state.current_stage,
             active_job_ids=[],
@@ -173,7 +179,7 @@ def poll_aggregation(state: StateObject):
             
     except Exception as e:
         logging.error(f"Error during polling: {e}")
-        return StateObject(
+        return AggregationWorkflowState(
             status="FAILED",
             current_stage=state.current_stage,
             active_job_ids=[],

From 0aa172ad0f079eef6a2e908557b6af269c7b53a2 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 15:13:18 +0530
Subject: [PATCH 08/33] refactor(aggregation): revert legacy prefixes to
 original names with temporary TODOs

- Reverted all 'Legacy' prefix names on compatibility Pydantic models (e.g. AggregationRequest, AggregationResponse) back to their original names, ensuring perfect backward compatibility for client-side code generators.
- Reverted compatibility route method names back to their original names (run_aggregation and get_aggregation_status).
- Added deprecated=True to the FastAPI route decorators for /run and /status to natively flag them in the OpenAPI/Swagger documentation UI.
- Added clear TODO comments and docstrings advising that these are temporary compatibility components to be removed once all consumers migrate.
- Renamed the test case in app_test.py to test_aggregation_run to match the method name.
---
 .../workflow/ingestion-helper/app_test.py     |  2 +-
 .../ingestion-helper/routes/aggregation.py    | 56 +++++++++++--------
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/app_test.py b/pipeline/workflow/ingestion-helper/app_test.py
index 65da0ca73..7a1a464cc 100644
--- a/pipeline/workflow/ingestion-helper/app_test.py
+++ b/pipeline/workflow/ingestion-helper/app_test.py
@@ -344,7 +344,7 @@ def test_aggregation_poll_still_running(self, mock_aggregation_utils):
         self.assertEqual(state["active_job_ids"], ["job-1", "job-2"])
 
     @patch('routes.aggregation._get_orchestrator')
-    def test_aggregation_legacy_run(self, mock_aggregation_utils):
+    def test_aggregation_run(self, mock_aggregation_utils):
         # Setup mock orchestrator
         mock_instance = MagicMock()
         mock_aggregation_utils.return_value = mock_instance
diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index 60c254e86..d08283124 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -43,19 +43,24 @@ class InitiateRequest(BaseModel):
     importList: List[Dict[str, Any]] = Field(default_factory=list)
 
 # =============================================================================
-# Pydantic Models for the Legacy API (Backward Compatibility)
+# Pydantic Models for Backward Compatibility (Temporary)
+# TODO: Remove these models once all consumers migrate to /initiate and /poll
 # =============================================================================
 
-class LegacyAggregationRequest(BaseModel):
+class AggregationRequest(BaseModel):
+    """Temporary request model for compatibility run endpoint."""
     importList: List[Dict[str, Any]] = Field(default_factory=list)
 
-class LegacyAggregationStatusRequest(BaseModel):
+class AggregationStatusRequest(BaseModel):
+    """Temporary request model for compatibility status endpoint."""
     jobIds: List[str] = Field(default_factory=list)
 
-class LegacyAggregationResponse(BaseResponse):
+class AggregationResponse(BaseResponse):
+    """Temporary response model for compatibility run endpoint."""
     jobIds: List[str] = Field(default_factory=list, description="BigQuery job IDs submitted for async aggregation")
 
-class LegacyAggregationStatusResponse(BaseResponse):
+class AggregationStatusResponse(BaseResponse):
+    """Temporary response model for compatibility status endpoint."""
     error: Optional[str] = Field(default=None, description="Detailed error message if failed")
     failedJobs: Optional[List[str]] = Field(default_factory=list, description="List of failed BigQuery job IDs")
 
@@ -188,16 +193,20 @@ def poll_aggregation(state: AggregationWorkflowState):
         )
 
 # -----------------------------------------------------------------------------
-# Legacy API Endpoints (Backward Compatibility Mode)
+# API Endpoints for Backward Compatibility (Temporary)
+# TODO: Remove these endpoints once all consumers migrate to /initiate and /poll
 # -----------------------------------------------------------------------------
 
-@router.post("/run", response_model=LegacyAggregationResponse)
+@router.post("/run", response_model=AggregationResponse, deprecated=True)
 @log_start
-def run_aggregation_legacy(req: LegacyAggregationRequest):
-    """Legacy endpoint. Runs ALL enabled aggregations in parallel (ignores stages)."""
+def run_aggregation(req: AggregationRequest):
+    """Temporary endpoint. Runs ALL enabled aggregations in parallel (ignores stages).
+
+    Please migrate to /initiate and /poll endpoints.
+    """
     if not req.importList:
-        logging.info("Empty import list. Skipping legacy aggregation.")
-        return LegacyAggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=[])
+        logging.info("Empty import list. Skipping temporary aggregation.")
+        return AggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=[])
         
     try:
         orchestrator = _get_orchestrator()
@@ -209,28 +218,31 @@ def run_aggregation_legacy(req: LegacyAggregationRequest):
         for stage_num in active_stages:
             job_ids.extend(orchestrator.execute_stage(stage_num, import_names))
                 
-        return LegacyAggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=job_ids)
+        return AggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=job_ids)
     except Exception as e:
-        logging.error(f"Legacy aggregation failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Legacy aggregation failed: {str(e)}")
+        logging.error(f"Temporary aggregation failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Temporary aggregation failed: {str(e)}")
 
 
-@router.post("/status", response_model=LegacyAggregationStatusResponse)
+@router.post("/status", response_model=AggregationStatusResponse, deprecated=True)
 @log_start
-def check_aggregation_status_legacy(req: LegacyAggregationStatusRequest):
-    """Legacy endpoint. Checks the status of the submitted BigQuery jobs."""
+def get_aggregation_status(req: AggregationStatusRequest):
+    """Temporary endpoint. Checks the status of the submitted BigQuery jobs.
+
+    Please migrate to /initiate and /poll endpoints.
+    """
     if not req.jobIds:
         logging.info("Empty jobIds. Returning status DONE.")
-        return LegacyAggregationStatusResponse(status=ResponseStatus.DONE)
+        return AggregationStatusResponse(status=ResponseStatus.DONE)
         
     try:
         orchestrator = _get_orchestrator()
         status_info = orchestrator.check_jobs_status(req.jobIds)
-        return LegacyAggregationStatusResponse(
+        return AggregationStatusResponse(
             status=ResponseStatus.from_str(status_info.get("status", "ERROR")),
             error=status_info.get("error"),
-            failedJobs=status_info.get("failedJobs", [])
+            failedJobs=status_info.get("failed_jobs", [])
         )
     except Exception as e:
-        logging.error(f"Legacy status check failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Legacy status check failed: {str(e)}")
+        logging.error(f"Temporary check status failed: {e}")
+        raise HTTPException(status_code=500, detail=f"Temporary check status failed: {str(e)}")

From 8562747a528af15c0b024ea9754fcffb3c1093a2 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 15:15:48 +0530
Subject: [PATCH 09/33] style(aggregation): remove redundant decorative comment
 headers

- Deleted redundant ASCII box section dividers and decorative headers (e.g. Pydantic Models for the New Stateless API, Router Definition) from routes/aggregation.py.
- Simplified backward compatibility section markers to clean, single-line TODO comments.
- This removes visual noise and aligns the file with clean pythonic commenting best practices.
---
 .../ingestion-helper/routes/aggregation.py     | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index d08283124..0f9d5478b 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -22,9 +22,7 @@
 from aggregation import AggregationOrchestrator
 from utils.logging import log_start
 
-# =============================================================================
-# Pydantic Models for the New Stateless API
-# =============================================================================
+
 
 class AggregationWorkflowState(BaseModel):
     """Represents the execution state of a multi-stage aggregation pipeline run.
@@ -42,10 +40,7 @@ class AggregationWorkflowState(BaseModel):
 class InitiateRequest(BaseModel):
     importList: List[Dict[str, Any]] = Field(default_factory=list)
 
-# =============================================================================
-# Pydantic Models for Backward Compatibility (Temporary)
 # TODO: Remove these models once all consumers migrate to /initiate and /poll
-# =============================================================================
 
 class AggregationRequest(BaseModel):
     """Temporary request model for compatibility run endpoint."""
@@ -64,9 +59,7 @@ class AggregationStatusResponse(BaseResponse):
     error: Optional[str] = Field(default=None, description="Detailed error message if failed")
     failedJobs: Optional[List[str]] = Field(default_factory=list, description="List of failed BigQuery job IDs")
 
-# =============================================================================
-# Router Definition
-# =============================================================================
+
 
 router = APIRouter(prefix="/aggregation", tags=["aggregation"])
 
@@ -87,9 +80,7 @@ def _get_orchestrator() -> AggregationOrchestrator:
         is_base_dc=config.IS_BASE_DC,
     )
 
-# -----------------------------------------------------------------------------
-# New Stateless API Endpoints (Stage-based)
-# -----------------------------------------------------------------------------
+
 
 @router.post("/initiate", response_model=AggregationWorkflowState)
 @log_start
@@ -192,10 +183,7 @@ def poll_aggregation(state: AggregationWorkflowState):
             error=f"Orchestrator error: {str(e)}"
         )
 
-# -----------------------------------------------------------------------------
-# API Endpoints for Backward Compatibility (Temporary)
 # TODO: Remove these endpoints once all consumers migrate to /initiate and /poll
-# -----------------------------------------------------------------------------
 
 @router.post("/run", response_model=AggregationResponse, deprecated=True)
 @log_start

From f02540bf1c96a1c7b5cae47027f38cff271db5a6 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 15:23:24 +0530
Subject: [PATCH 10/33] refactor(aggregation): localize TODO comments for
 temporary models and endpoints

- Replaced the global section-level TODO comments in routes/aggregation.py with localized, specific TODO comments.
- Placed an explicit, actionable TODO comment directly above each of the four compatibility models (AggregationRequest, AggregationStatusRequest, etc.).
- Placed an explicit TODO comment directly above the /run and /status route decorators.
- This ensures technical debt is highly visible, actionable, and tied directly to the specific components slated for removal after consumer migration.
---
 .../workflow/ingestion-helper/routes/aggregation.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index 0f9d5478b..f15e8a4e1 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -40,20 +40,25 @@ class AggregationWorkflowState(BaseModel):
 class InitiateRequest(BaseModel):
     importList: List[Dict[str, Any]] = Field(default_factory=list)
 
-# TODO: Remove these models once all consumers migrate to /initiate and /poll
-
+# TODO: Remove AggregationRequest once all consumers migrate to /initiate and /poll
 class AggregationRequest(BaseModel):
     """Temporary request model for compatibility run endpoint."""
     importList: List[Dict[str, Any]] = Field(default_factory=list)
 
+
+# TODO: Remove AggregationStatusRequest once all consumers migrate to /initiate and /poll
 class AggregationStatusRequest(BaseModel):
     """Temporary request model for compatibility status endpoint."""
     jobIds: List[str] = Field(default_factory=list)
 
+
+# TODO: Remove AggregationResponse once all consumers migrate to /initiate and /poll
 class AggregationResponse(BaseResponse):
     """Temporary response model for compatibility run endpoint."""
     jobIds: List[str] = Field(default_factory=list, description="BigQuery job IDs submitted for async aggregation")
 
+
+# TODO: Remove AggregationStatusResponse once all consumers migrate to /initiate and /poll
 class AggregationStatusResponse(BaseResponse):
     """Temporary response model for compatibility status endpoint."""
     error: Optional[str] = Field(default=None, description="Detailed error message if failed")
@@ -183,8 +188,7 @@ def poll_aggregation(state: AggregationWorkflowState):
             error=f"Orchestrator error: {str(e)}"
         )
 
-# TODO: Remove these endpoints once all consumers migrate to /initiate and /poll
-
+# TODO: Remove the /run endpoint once all consumers migrate to /initiate and /poll
 @router.post("/run", response_model=AggregationResponse, deprecated=True)
 @log_start
 def run_aggregation(req: AggregationRequest):
@@ -212,6 +216,7 @@ def run_aggregation(req: AggregationRequest):
         raise HTTPException(status_code=500, detail=f"Temporary aggregation failed: {str(e)}")
 
 
+# TODO: Remove the /status endpoint once all consumers migrate to /initiate and /poll
 @router.post("/status", response_model=AggregationStatusResponse, deprecated=True)
 @log_start
 def get_aggregation_status(req: AggregationStatusRequest):

From 41494ef47dd684f159f20f05dc7fbc6ff16a4208 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 15:26:28 +0530
Subject: [PATCH 11/33] refactor(aggregation): fix PEP 8 formatting and clean
 up unused imports

- Fixed a PEP 8 E701 violation in orchestrator.py by splitting a single-line 'if' statement into a standard multi-line block.
- Resolved a Ruff F401 unused import warning in app_test.py by removing 'import os'.
- Verified that all 30 tests continue to pass 100% after the style cleanup.
---
 pipeline/workflow/ingestion-helper/aggregation/orchestrator.py | 3 ++-
 pipeline/workflow/ingestion-helper/app_test.py                 | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index de6b7e0da..de9dcc597 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -115,7 +115,8 @@ def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
                     destination_type=config["destination_type"],
                     allow_multiple_to_places=config.get("allow_multiple_to_places", False)
                 )
-                if job: step_jobs.append(job)
+                if job:
+                    step_jobs.append(job)
 
             elif step_type == "stat_var":
                 sv_jobs = self.stat_var_aggregator.aggregate_stat_vars(
diff --git a/pipeline/workflow/ingestion-helper/app_test.py b/pipeline/workflow/ingestion-helper/app_test.py
index 7a1a464cc..acf0b7796 100644
--- a/pipeline/workflow/ingestion-helper/app_test.py
+++ b/pipeline/workflow/ingestion-helper/app_test.py
@@ -15,7 +15,6 @@
 import unittest
 from unittest.mock import MagicMock, patch
 from datetime import datetime
-import os
 
 from fastapi.testclient import TestClient
 from app import app

From a753fd7a922354c3c4ca285db51cb9fe4da182a5 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 16:17:51 +0530
Subject: [PATCH 12/33] refactor(aggregation): improve orchestrator design,
 logging, and layout

---
 .../aggregation/orchestrator.py               | 117 ++++++++++++------
 1 file changed, 77 insertions(+), 40 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index de9dcc597..cfb931125 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -57,12 +57,7 @@ def __init__(self,
                                          location=location,
                                          run_sequential=False)
 
-        # Initialize all generators
-        self.place_generator = PlaceAggregationGenerator(self.executor, is_base_dc)
-        self.stat_var_aggregator = StatVarAggregator(self.executor, is_base_dc)
-        self.linked_edge_generator = LinkedEdgeGenerator(self.executor, is_base_dc)
-        self.provenance_summary_generator = ProvenanceSummaryGenerator(self.executor, is_base_dc)
-        self.stat_var_group_generator = StatVarGroupGenerator(self.executor, is_base_dc)
+        self.is_base_dc = is_base_dc
 
         # Resolve paths for default config and schema
         curr_dir = os.path.dirname(os.path.abspath(__file__))
@@ -73,8 +68,6 @@ def __init__(self,
         # Load and validate configuration
         self.aggregations = validate_config(config_file_path, schema_file_path)
 
-
-
     def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
         """Executes all enabled aggregations in the specified stage in parallel.
 
@@ -85,7 +78,8 @@ def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
         Returns:
             A list of BigQuery job IDs submitted for this stage.
         """
-        logging.info(f"Executing Aggregation Stage {stage_num} for active imports: {active_imports}")
+        logging.info(f"=== Starting Aggregation Orchestration for Stage {stage_num} ===")
+        logging.info(f"Active imports in this run: {active_imports}")
         jobs = []
 
         for config in self.aggregations:
@@ -100,42 +94,23 @@ def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
             # 3. Filter by active imports
             applicable_imports = self._get_applicable_imports(config, active_imports)
             if not applicable_imports:
-                logging.info(f"Skipping step '{config['type']}' in Stage {stage_num}: no matching active imports.")
                 continue
 
-            # 4. Route to correct generator
+            # 4. Route to correct generator helper
             step_type = config["type"]
-            logging.info(f"Submitting step '{step_type}' in Stage {stage_num} for imports: {applicable_imports}")
+            logging.info(f"Triggering step '{step_type}' in Stage {stage_num}...")
             
             step_jobs = []
             if step_type == "place":
-                job = self.place_generator.aggregate_places(
-                    import_names=applicable_imports,
-                    source_type=config["source_type"],
-                    destination_type=config["destination_type"],
-                    allow_multiple_to_places=config.get("allow_multiple_to_places", False)
-                )
-                if job:
-                    step_jobs.append(job)
-
+                step_jobs = self._trigger_place(config, applicable_imports)
             elif step_type == "stat_var":
-                sv_jobs = self.stat_var_aggregator.aggregate_stat_vars(
-                    ancestor_sv=config["ancestor_sv_id"],
-                    source_svs=config["source_sv_ids"],
-                    import_names=applicable_imports,
-                    output_import_name=config.get("output_import_name"),
-                    skip_all_sources_present_check=config.get("skip_all_sources_present_check", False)
-                )
-                step_jobs.extend(sv_jobs)
-
+                step_jobs = self._trigger_stat_var(config, applicable_imports)
             elif step_type == "linked_edges":
-                step_jobs.extend(self.linked_edge_generator.run_all(applicable_imports))
-
+                step_jobs = self._trigger_linked_edges(config, applicable_imports)
             elif step_type == "provenance_summary":
-                step_jobs.extend(self.provenance_summary_generator.run_all(applicable_imports))
-
+                step_jobs = self._trigger_provenance_summary(config, applicable_imports)
             elif step_type == "stat_var_groups":
-                step_jobs.extend(self.stat_var_group_generator.run_all(applicable_imports))
+                step_jobs = self._trigger_stat_var_groups(config, applicable_imports)
             else:
                 raise ValueError(f"Unsupported or unimplemented aggregation step type: {step_type}")
 
@@ -144,7 +119,7 @@ def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
                 if job and job.job_id:
                     jobs.append(job.job_id)
 
-        logging.info(f"Submitted {len(jobs)} jobs in Stage {stage_num}: {jobs}")
+        logging.info(f"=== Stage {stage_num} initiated successfully. Submitted {len(jobs)} BigQuery jobs: {jobs} ===")
         return jobs
 
     def has_stage(self, stage_num: int, active_imports: List[str]) -> bool:
@@ -180,12 +155,24 @@ def get_active_stages(self, active_imports: List[str]) -> List[int]:
         """
         stages = set()
         for config in self.aggregations:
+            step_type = config.get("type")
+            stage_num = config.get("stage", 1)
+
             if config.get("disabled", False):
+                logging.info(f"[Config Scan] Skipping step '{step_type}' in Stage {stage_num} because it is disabled.")
                 continue
-            # Check if it applies to any active imports
-            if self._get_applicable_imports(config, active_imports):
-                stages.add(config.get("stage", 1))
-        return sorted(list(stages))
+
+            applicable_imports = self._get_applicable_imports(config, active_imports)
+            if not applicable_imports:
+                logging.info(f"[Config Scan] Skipping step '{step_type}' in Stage {stage_num} because it does not apply to active imports: {active_imports}.")
+                continue
+
+            logging.info(f"[Config Scan] Step '{step_type}' in Stage {stage_num} is ACTIVE for imports: {applicable_imports}.")
+            stages.add(stage_num)
+        
+        sorted_stages = sorted(list(stages))
+        logging.info(f"[Config Scan] Active stages resolved: {sorted_stages}")
+        return sorted_stages
 
     def check_jobs_status(self, job_ids: List[str]) -> Dict[str, Any]:
         """Checks the status of the specified BigQuery job IDs.
@@ -198,6 +185,56 @@ def check_jobs_status(self, job_ids: List[str]) -> Dict[str, Any]:
             logging.error(f"Failed to check jobs status: {e}")
             raise e
 
+    def _trigger_place(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
+        """Triggers place-level rollup aggregations."""
+        source_type = config["source_type"]
+        destination_type = config["destination_type"]
+        logging.info(
+            f"  -> Place Rollup: {source_type} -> {destination_type} for imports {applicable_imports}"
+        )
+        generator = PlaceAggregationGenerator(self.executor, self.is_base_dc)
+        job = generator.aggregate_places(
+            import_names=applicable_imports,
+            source_type=source_type,
+            destination_type=destination_type,
+            allow_multiple_to_places=config.get("allow_multiple_to_places", False)
+        )
+        return [job] if job else []
+
+    def _trigger_stat_var(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
+        """Triggers statistical variable aggregations."""
+        ancestor_sv = config["ancestor_sv_id"]
+        source_svs = config["source_sv_ids"]
+        logging.info(
+            f"  -> Stat Var Aggregation: ancestor '{ancestor_sv}' (sources: {source_svs}) for imports {applicable_imports}"
+        )
+        generator = StatVarAggregator(self.executor, self.is_base_dc)
+        return generator.aggregate_stat_vars(
+            ancestor_sv=ancestor_sv,
+            source_svs=source_svs,
+            import_names=applicable_imports,
+            output_import_name=config.get("output_import_name"),
+            skip_all_sources_present_check=config.get("skip_all_sources_present_check", False)
+        )
+
+    def _trigger_linked_edges(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
+        """Triggers linked edge aggregations."""
+        logging.info(f"  -> Linked Edges Aggregation for imports {applicable_imports}")
+        generator = LinkedEdgeGenerator(self.executor, self.is_base_dc)
+        return generator.run_all(applicable_imports)
+
+    def _trigger_provenance_summary(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
+        """Triggers provenance summary aggregations."""
+        logging.info(f"  -> Provenance Summary Aggregation for imports {applicable_imports}")
+        generator = ProvenanceSummaryGenerator(self.executor, self.is_base_dc)
+        return generator.run_all(applicable_imports)
+
+    def _trigger_stat_var_groups(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
+        """Triggers statistical variable group aggregations."""
+        logging.info(f"  -> Stat Var Groups Aggregation for imports {applicable_imports}")
+        generator = StatVarGroupGenerator(self.executor, self.is_base_dc)
+        return generator.run_all(applicable_imports)
+
     def _get_applicable_imports(self, config: Dict[str, Any], active_imports: List[str]) -> List[str]:
         """Determines which active imports apply to this aggregation config."""
         configured_imports = config["imports"]

From dec90244b5768803c622175dc22d6b5fdb61871e Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 23:07:16 +0530
Subject: [PATCH 13/33] docs(aggregation): add module user guide README

---
 .../ingestion-helper/aggregation/README.md    | 147 ++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/README.md

diff --git a/pipeline/workflow/ingestion-helper/aggregation/README.md b/pipeline/workflow/ingestion-helper/aggregation/README.md
new file mode 100644
index 000000000..ec61e5a40
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/README.md
@@ -0,0 +1,147 @@
+# Aggregations
+
+This module orchestrates the execution of Data Commons aggregations through BigQuery Federation. The aggregations include place rollups, statistical variable aggregations, linked edges, and metadata summaries.
+
+## Core Concepts
+
+*   **Sequential Stages**: Aggregations are executed sequentially by their `stage` number (e.g., Stage 1 steps are guaranteed to complete before Stage 2 steps begin). This is useful when later steps depend on the output of earlier ones.
+*   **Parallel Execution**: All aggregation steps configured in the same stage are executed in parallel to maximize performance.
+
+---
+
+## Configuration Guide (`aggregation.yaml`)
+
+The entire aggregation pipeline is configured via `aggregation.yaml`. This file defines which aggregations run, what their dependencies are, and in what order they execute.
+
+### Common Configuration Fields
+Every step in the configuration supports these common fields:
+*   `type` (string, Required): The type of aggregation step to run.
+*   `stage` (integer, Optional, default: 1): The sequential stage number. Steps in lower stages are guaranteed to finish before higher stages start.
+*   `imports` (list of strings, Required): The list of import names this step applies to. Use `["*"]` (wildcard) to apply the step to **all** imports in the current run.
+*   `disabled` (boolean, Optional, default: false): Set to `true` to temporarily disable a step without deleting it.
+
+---
+
+### Supported Aggregation Types
+
+#### 1. Place (`place`)
+Aggregates and rolls up statistical data from a smaller place type (source) to a larger place type (destination).
+*   **Fields**:
+    *   `source_type` (string, Required): The source place type (e.g., `County`).
+    *   `destination_type` (string, Required): The destination place type (e.g., `State`).
+    *   `allow_multiple_to_places` (boolean, Optional, default: false): Allows mapping to multiple parent places if true.
+*   **Example**:
+    ```yaml
+    - type: place
+      stage: 1
+      imports: ["USFed_Census"]
+      source_type: County
+      destination_type: State
+    ```
+
+#### 2. Statistical Variable Aggregation (`stat_var`)
+Aggregates raw statistical variables into a summarized ancestor variable (e.g., summing up individual age group counts to get a total population count).
+*   **Fields**:
+    *   `ancestor_sv_id` (string, Required): The ID of the parent/summary statistical variable (e.g., `Count_Person`).
+    *   `source_sv_ids` (list of strings, Required): The list of individual statistical variables to sum up.
+    *   `output_import_name` (string, Optional): Custom import name to write output under.
+    *   `skip_all_sources_present_check` (boolean, Optional, default: false): If true, aggregates even if some source variables are missing.
+*   **Example**:
+    ```yaml
+    - type: stat_var
+      stage: 2
+      imports: ["USFed_Census"]
+      ancestor_sv_id: Count_Person
+      source_sv_ids:
+        - Count_Person_Male
+        - Count_Person_Female
+    ```
+
+#### 3. Linked Edges (`linked_edges`)
+Constructs and aggregates structural graph links (edges) between nodes in the Data Commons graph.
+*   **Example**:
+    ```yaml
+    - type: linked_edges
+      stage: 1
+      imports: ["*"] # Runs for all imports
+    ```
+
+#### 4. Provenance Summary (`provenance_summary`)
+Generates metadata and provenance summaries for all aggregated statistical observations, establishing data lineage.
+*   **Example**:
+    ```yaml
+    - type: provenance_summary
+      stage: 3
+      imports: ["USFed_Census"]
+    ```
+
+#### 5. Statistical Variable Groups (`stat_var_groups`)
+Aggregates and structures statistical variables into hierarchical groups for display in the Data Commons UI.
+*   **Example**:
+    ```yaml
+    - type: stat_var_groups
+      stage: 3
+      imports: ["*"]
+    ```
+
+---
+
+### Example `aggregation.yaml`
+
+This example demonstrates a typical multi-stage aggregation workflow.
+
+```yaml
+# aggregation.yaml
+aggregations:
+  # Stage 1: Parallel Place Rollups and Linked Edges
+  - type: linked_edges
+    stage: 1
+    imports: ["*"]
+
+  - type: place
+    stage: 1
+    imports: ["USFed_Census"]
+    source_type: County
+    destination_type: State
+
+  # Stage 2: Parallel Stat Var Aggregations (Depends on Stage 1 completing)
+  - type: stat_var
+    stage: 2
+    imports: ["USFed_Census"]
+    ancestor_sv_id: Count_Person
+    source_sv_ids:
+      - Count_Person_Male
+      - Count_Person_Female
+
+  # Stage 3: Metadata and UI Summaries (Depends on Stage 2 completing)
+  - type: provenance_summary
+    stage: 3
+    imports: ["USFed_Census"]
+
+  - type: stat_var_groups
+    stage: 3
+    imports: ["*"]
+```
+
+---
+
+## Local Configuration Validation
+
+The orchestrator strictly validates the `aggregation.yaml` file on startup against a strict JSON Schema (`schema.json`). If there is any syntax error, type mismatch, or missing required field, the service will fail to start.
+
+### Running the Validator Locally
+You can validate your `aggregation.yaml` file locally using the built-in CLI tool before committing or deploying changes.
+
+1.  **Navigate to the ingestion-helper root**:
+    ```bash
+    cd pipeline/workflow/ingestion-helper
+    ```
+2.  **Run the validator**:
+    ```bash
+    python3 -m aggregation.validator --config ../aggregation.yaml
+
+    # sample output...
+    # Validating 'aggregation.yaml' against 'schema.json'...
+    # [SUCCESS] Configuration is valid!
+    # Parsed 5 aggregation steps successfully.
+    ```

From fb4be20f420cb26505eb6bfd12155d29a341b59c Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 23:32:51 +0530
Subject: [PATCH 14/33] test(aggregation): refactor validator and orchestrator
 test suites

---
 .../aggregation/orchestrator_test.py          | 247 ++++----
 .../aggregation/validator_test.py             | 567 +++++++++---------
 2 files changed, 416 insertions(+), 398 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index 2f9720995..435d5a658 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -12,121 +12,122 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Unit tests for the AggregationOrchestrator class using real temporary files."""
+
 import json
 import os
 import sys
+import tempfile
+import textwrap
 import unittest
-from unittest.mock import MagicMock, patch, mock_open
+from unittest.mock import MagicMock, patch
 
 sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 
 from aggregation import AggregationOrchestrator
 
-
-# Sample valid YAML config for testing
-VALID_CONFIG_YAML = """
-aggregations:
-  - type: linked_edges
-    imports: ["*"]
-    stage: 1
-
-  - type: place
-    source_type: County
-    destination_type: State
-    allow_multiple_to_places: false
-    imports: ["USFed_Census"]
-    stage: 1
-
-  - type: place
-    source_type: State
-    destination_type: Country
-    imports: ["*"]
-    stage: 2
-    disabled: true
-
-  - type: stat_var
-    ancestor_sv_id: Count_Person
-    source_sv_ids: ["Count_Person_Male", "Count_Person_Female"]
-    skip_all_sources_present_check: true
-    imports: ["USFed_Census"]
-    stage: 2
-"""
+VALID_CONFIG_YAML = textwrap.dedent("""\
+    aggregations:
+      - type: linked_edges
+        imports: ["*"]
+        stage: 1
+
+      - type: place
+        source_type: County
+        destination_type: State
+        allow_multiple_to_places: false
+        imports: ["USFed_Census"]
+        stage: 1
+
+      - type: place
+        source_type: State
+        destination_type: Country
+        imports: ["*"]
+        stage: 2
+        disabled: true
+
+      - type: stat_var
+        ancestor_sv_id: Count_Person
+        source_sv_ids: ["Count_Person_Male", "Count_Person_Female"]
+        skip_all_sources_present_check: true
+        imports: ["USFed_Census"]
+        stage: 2
+""")
 
 
 @patch('aggregation.orchestrator.BigQueryExecutor')
-@patch('aggregation.orchestrator.PlaceAggregationGenerator')
-@patch('aggregation.orchestrator.StatVarAggregator')
-@patch('aggregation.orchestrator.LinkedEdgeGenerator')
-@patch('aggregation.orchestrator.ProvenanceSummaryGenerator')
-@patch('aggregation.orchestrator.StatVarGroupGenerator')
-class TestAggregationOrchestrator(unittest.TestCase):
+class TestOrchestratorScanning(unittest.TestCase):
+    """Tests the stage scanning and active stage resolution methods."""
 
     def setUp(self):
-        # Load the actual schema for validation tests (now inside aggregation/)
-        schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
-        with open(schema_path, "r") as f:
-            self.schema_json = json.load(f)
+        self.tmpdir = tempfile.TemporaryDirectory()
+        config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+        with open(config_path, "w") as f:
+            f.write(VALID_CONFIG_YAML)
+
+        self.orchestrator = AggregationOrchestrator(
+            connection_id="conn",
+            project_id="proj",
+            instance_id="inst",
+            database_id="db",
+            config_file_path=config_path
+        )
 
-    def _get_mock_open(self, yaml_content):
-        """Helper to mock open() calls for both the config YAML and the schema JSON."""
-        def side_effect(path, *args, **kwargs):
-            if "schema.json" in path:
-                return mock_open(read_data=json.dumps(self.schema_json))().__enter__()
-            elif "aggregation.yaml" in path:
-                return mock_open(read_data=yaml_content)().__enter__()
-            raise FileNotFoundError(f"Mock open not configured for: {path}")
-        return side_effect
+    def tearDown(self):
+        self.tmpdir.cleanup()
 
+    def test_has_stage(self, mock_executor):
+        """Tests the has_stage method for active, disabled, and non-matching stages."""
+        self.assertTrue(self.orchestrator.has_stage(1, ["AnyImport"]))
+        self.assertTrue(self.orchestrator.has_stage(1, ["USFed_Census"]))
 
+        self.assertFalse(self.orchestrator.has_stage(2, ["OtherImport"]))
+        self.assertTrue(self.orchestrator.has_stage(2, ["USFed_Census"]))
 
-    @patch('builtins.open')
-    def test_has_stage(self, mock_file_open, *mocks):
-        """Tests the has_stage method for active, disabled, and non-matching stages."""
-        mock_file_open.side_effect = self._get_mock_open(VALID_CONFIG_YAML)
-        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
+        self.assertFalse(self.orchestrator.has_stage(3, ["USFed_Census"]))
 
-        # Stage 1: Has linked_edges (wildcard) and place (USFed_Census)
-        self.assertTrue(utils.has_stage(1, ["AnyImport"]))
-        self.assertTrue(utils.has_stage(1, ["USFed_Census"]))
+    def test_get_active_stages(self, mock_executor):
+        """Tests that get_active_stages correctly extracts, filters, and sorts active stages."""
+        stages = self.orchestrator.get_active_stages(active_imports=["USFed_Census"])
+        self.assertEqual(stages, [1, 2])
 
-        # Stage 2: Has place (disabled: true) and stat_var (USFed_Census)
-        # If active import is "OtherImport", Stage 2 has no active aggregations (stat_var doesn't match, place is disabled)
-        self.assertFalse(utils.has_stage(2, ["OtherImport"]))
-        # If active import is "USFed_Census", Stage 2 has stat_var active
-        self.assertTrue(utils.has_stage(2, ["USFed_Census"]))
+        stages = self.orchestrator.get_active_stages(active_imports=["OtherImport"])
+        self.assertEqual(stages, [1])
 
-        # Stage 3: Does not exist in config
-        self.assertFalse(utils.has_stage(3, ["USFed_Census"]))
 
-    @patch('builtins.open')
-    def test_get_active_stages(self, mock_file_open, *mocks):
-        """Tests that get_active_stages correctly extracts, filters, and sorts active stages."""
-        mock_file_open.side_effect = self._get_mock_open(VALID_CONFIG_YAML)
+@patch('aggregation.orchestrator.BigQueryExecutor')
+@patch('aggregation.orchestrator.PlaceAggregationGenerator')
+@patch('aggregation.orchestrator.StatVarAggregator')
+@patch('aggregation.orchestrator.LinkedEdgeGenerator')
+@patch('aggregation.orchestrator.ProvenanceSummaryGenerator')
+@patch('aggregation.orchestrator.StatVarGroupGenerator')
+class TestOrchestratorExecution(unittest.TestCase):
+    """Tests stage execution, verifying parallel job submission and routing.
 
-        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
+    These tests execute stages, so they mock the executor and all five generators
+    to verify correct parameters are passed and jobs are collected.
+    """
 
-        # 1. For active import "USFed_Census":
-        # Stage 1 (linked_edges, place) and Stage 2 (stat_var) have active steps.
-        # The place rollup in Stage 2 is disabled, but the stat_var step is enabled and active.
-        # Therefore, active stages should be [1, 2].
-        stages = utils.get_active_stages(active_imports=["USFed_Census"])
-        self.assertEqual(stages, [1, 2])
+    def setUp(self):
+        self.tmpdir = tempfile.TemporaryDirectory()
+        config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+        with open(config_path, "w") as f:
+            f.write(VALID_CONFIG_YAML)
+
+        self.orchestrator = AggregationOrchestrator(
+            connection_id="conn",
+            project_id="proj",
+            instance_id="inst",
+            database_id="db",
+            config_file_path=config_path
+        )
 
-        # 2. For active import "OtherImport":
-        # Stage 1 (linked_edges) matches via wildcard.
-        # Stage 2 (place rollup is disabled, stat_var does not match "OtherImport").
-        # Therefore, only Stage 1 is active. Active stages should be [1].
-        stages = utils.get_active_stages(active_imports=["OtherImport"])
-        self.assertEqual(stages, [1])
+    def tearDown(self):
+        self.tmpdir.cleanup()
 
-    @patch('builtins.open')
-    def test_execute_stage_1(self, mock_file_open, 
-                             mock_svg_gen, mock_prov_gen, mock_edge_gen, 
+    def test_execute_stage_1(self, mock_svg_gen, mock_prov_gen, mock_edge_gen, 
                              mock_sv_agg, mock_place_gen, mock_executor):
         """Tests executing Stage 1, verifying parallel job submission and wildcard resolution."""
-        mock_file_open.side_effect = self._get_mock_open(VALID_CONFIG_YAML)
-        
-        # Setup generator mocks to return mock jobs
         mock_job1 = MagicMock()
         mock_job1.job_id = "job-edge-1"
         mock_edge_gen.return_value.run_all.return_value = [mock_job1]
@@ -135,20 +136,14 @@ def test_execute_stage_1(self, mock_file_open,
         mock_job2.job_id = "job-place-1"
         mock_place_gen.return_value.aggregate_places.return_value = mock_job2
 
-        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
+        job_ids = self.orchestrator.execute_stage(stage_num=1, active_imports=["USFed_Census"])
 
-        # Execute Stage 1 for active imports: ["USFed_Census"]
-        job_ids = utils.execute_stage(stage_num=1, active_imports=["USFed_Census"])
-
-        # Assertions
         self.assertEqual(len(job_ids), 2)
         self.assertIn("job-edge-1", job_ids)
         self.assertIn("job-place-1", job_ids)
 
-        # Verify linked_edges ran for ALL active imports (wildcard '*')
         mock_edge_gen.return_value.run_all.assert_called_once_with(["USFed_Census"])
         
-        # Verify place rollup ran for matching import "USFed_Census"
         mock_place_gen.return_value.aggregate_places.assert_called_once_with(
             import_names=["USFed_Census"],
             source_type="County",
@@ -156,33 +151,22 @@ def test_execute_stage_1(self, mock_file_open,
             allow_multiple_to_places=False
         )
 
-    @patch('builtins.open')
-    def test_execute_stage_2_with_disabled_and_filtering(self, mock_file_open, 
-                                                         mock_svg_gen, mock_prov_gen, mock_edge_gen, 
+    def test_execute_stage_2_with_disabled_and_filtering(self, mock_svg_gen, mock_prov_gen, mock_edge_gen, 
                                                          mock_sv_agg, mock_place_gen, mock_executor):
         """Tests Stage 2, verifying that disabled steps are skipped and non-matching imports are filtered."""
-        mock_file_open.side_effect = self._get_mock_open(VALID_CONFIG_YAML)
-
-        # Setup mock for Stage 2 stat_var job
         mock_job_sv = MagicMock()
         mock_job_sv.job_id = "job-sv-1"
         mock_sv_agg.return_value.aggregate_stat_vars.return_value = [mock_job_sv]
 
-        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
-
-        # 1. Run for an import that does NOT match stat_var ("OtherImport")
-        # Since the place rollup in Stage 2 is disabled, no jobs should run at all.
-        job_ids = utils.execute_stage(stage_num=2, active_imports=["OtherImport"])
+        job_ids = self.orchestrator.execute_stage(stage_num=2, active_imports=["OtherImport"])
         self.assertEqual(len(job_ids), 0)
         mock_place_gen.return_value.aggregate_places.assert_not_called()
         mock_sv_agg.return_value.aggregate_stat_vars.assert_not_called()
 
-        # 2. Run for matching import "USFed_Census"
-        # The disabled place rollup should still be skipped, but the stat_var aggregation should execute.
-        job_ids = utils.execute_stage(stage_num=2, active_imports=["USFed_Census"])
+        job_ids = self.orchestrator.execute_stage(stage_num=2, active_imports=["USFed_Census"])
         
         self.assertEqual(job_ids, ["job-sv-1"])
-        mock_place_gen.return_value.aggregate_places.assert_not_called() # Still skipped because disabled: true
+        mock_place_gen.return_value.aggregate_places.assert_not_called()
         mock_sv_agg.return_value.aggregate_stat_vars.assert_called_once_with(
             ancestor_sv="Count_Person",
             source_svs=["Count_Person_Male", "Count_Person_Female"],
@@ -191,26 +175,33 @@ def test_execute_stage_2_with_disabled_and_filtering(self, mock_file_open,
             skip_all_sources_present_check=True
         )
 
-    @patch('builtins.open')
-    def test_execute_stage_unsupported_type(self, mock_file_open, *mocks):
+    def test_execute_stage_unsupported_type(self, *mocks):
         """Tests that an unsupported aggregation step type raises ValueError."""
-        # Use 'entity' which is valid in schema but unimplemented in orchestrator
-        unimplemented_config = """
-        aggregations:
-          - type: entity
-            entity_types: ["MortalityEvent"]
-            location_props: ["location"]
-            imports: ["*"]
-            stage: 1
-        """
-        mock_file_open.side_effect = self._get_mock_open(unimplemented_config)
-
-        utils = AggregationOrchestrator(connection_id="conn", project_id="proj", instance_id="inst", database_id="db")
-
-        # Running Stage 1 should raise ValueError due to unimplemented "entity" type
-        with self.assertRaises(ValueError) as ctx:
-            utils.execute_stage(stage_num=1, active_imports=["USFed_Census"])
-        self.assertIn("Unsupported or unimplemented aggregation step type: entity", str(ctx.exception))
+        unimplemented_config = textwrap.dedent("""\
+            aggregations:
+              - type: entity
+                entity_types: ["MortalityEvent"]
+                location_props: ["location"]
+                imports: ["*"]
+                stage: 1
+        """)
+        
+        with tempfile.TemporaryDirectory() as local_tmpdir:
+            local_config_path = os.path.join(local_tmpdir, "aggregation.yaml")
+            with open(local_config_path, "w") as f:
+                f.write(unimplemented_config)
+            
+            local_orchestrator = AggregationOrchestrator(
+                connection_id="conn",
+                project_id="proj",
+                instance_id="inst",
+                database_id="db",
+                config_file_path=local_config_path
+            )
+            
+            with self.assertRaises(ValueError) as ctx:
+                local_orchestrator.execute_stage(stage_num=1, active_imports=["USFed_Census"])
+            self.assertIn("Unsupported or unimplemented aggregation step type: entity", str(ctx.exception))
 
 
 if __name__ == '__main__':
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
index 34978f58d..44cd56d82 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Unit tests for the aggregation configuration validator."""
+"""Unit tests for the aggregation configuration validator using real temporary files."""
 
-import json
 import os
 import sys
+import tempfile
+import textwrap
 import unittest
-from unittest.mock import mock_open, patch
 import jsonschema
 import yaml
 
@@ -26,283 +26,310 @@
 
 from aggregation import validate_config
 
-# =============================================================================
-# Mock YAML Configurations for Testing
-# =============================================================================
-
-# 1. A perfectly valid config containing all possible types and fields
-VALID_ALL_TYPES_YAML = """
-aggregations:
-  - type: linked_edges
-    imports: ["*"]
-    stage: 1
-    disabled: false
-
-  - type: place
-    source_type: County
-    destination_type: State
-    allow_multiple_to_places: true
-    imports: ["ImportA", "ImportB"]
-    stage: 2
-
-  - type: stat_var
-    ancestor_sv_id: Count_Person
-    source_sv_ids: ["Count_Person_Male", "Count_Person_Female"]
-    skip_all_sources_present_check: true
-    output_import_name: "Aggregated_Pop"
-    imports: ["ImportC"]
-    stage: 3
-
-  - type: entity
-    entity_types: ["MortalityEvent"]
-    location_props: ["location"]
-    date_prop: "date"
-    agg_date_formats: ["%Y"]
-    imports: ["ImportD"]
-
-  - type: provenance_summary
-    imports: ["*"]
-
-  - type: stat_var_groups
-    imports: ["*"]
-"""
-
-# 2. Invalid: Missing required type field
-INVALID_MISSING_TYPE_YAML = """
-aggregations:
-  - imports: ["*"]
-"""
-
-# 3. Invalid: Missing required imports field
-INVALID_MISSING_IMPORTS_YAML = """
-aggregations:
-  - type: linked_edges
-"""
-
-# 4. Invalid: imports is a string instead of an array
-INVALID_IMPORTS_TYPE_YAML = """
-aggregations:
-  - type: linked_edges
-    imports: "*"
-"""
-
-# 5. Invalid: stage is a string instead of an integer
-INVALID_STAGE_TYPE_YAML = """
-aggregations:
-  - type: linked_edges
-    imports: ["*"]
-    stage: "first"
-"""
-
-# 6. Invalid: stage is 0 (minimum is 1)
-INVALID_STAGE_VALUE_YAML = """
-aggregations:
-  - type: linked_edges
-    imports: ["*"]
-    stage: 0
-"""
-
-# 7. Invalid: empty imports list (minItems: 1)
-INVALID_EMPTY_IMPORTS_YAML = """
-aggregations:
-  - type: linked_edges
-    imports: []
-"""
-
-# 8. Invalid place rollup: missing required source_type
-INVALID_PLACE_MISSING_FIELD_YAML = """
-aggregations:
-  - type: place
-    destination_type: State
-    imports: ["*"]
-"""
-
-# 9. Invalid stat var: missing required source_sv_ids
-INVALID_STAT_VAR_MISSING_FIELD_YAML = """
-aggregations:
-  - type: stat_var
-    ancestor_sv_id: Count_Person
-    imports: ["*"]
-"""
-
-# 10. Invalid stat var: empty source_sv_ids list
-INVALID_STAT_VAR_EMPTY_SVS_YAML = """
-aggregations:
-  - type: stat_var
-    ancestor_sv_id: Count_Person
-    source_sv_ids: []
-    imports: ["*"]
-"""
-
-# 11. Invalid entity: missing required location_props
-INVALID_ENTITY_MISSING_FIELD_YAML = """
-aggregations:
-  - type: entity
-    entity_types: ["Event"]
-    imports: ["*"]
-"""
-
-# 12. Malformed YAML (Indentation error)
-MALFORMED_YAML = """
-aggregations:
-  - type: linked_edges
-  imports:
-  - "*"
-"""
-
-
-class TestConfigValidator(unittest.TestCase):
+
+class TestValidatorSuccess(unittest.TestCase):
+    """Verifies successful validation paths for valid configurations."""
 
     def setUp(self):
         # Load the actual schema from the workspace to ensure tests remain realistic
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
-        with open(self.schema_path, "r") as f:
-            self.schema_json = json.load(f)
-
-    def _get_mock_open(self, yaml_content):
-        """Helper to mock open() for both the schema JSON and the target YAML."""
-        def side_effect(path, *args, **kwargs):
-            if "schema.json" in path:
-                return mock_open(read_data=json.dumps(self.schema_json))().__enter__()
-            else:
-                return mock_open(read_data=yaml_content)().__enter__()
-        return side_effect
-
-    # =============================================================================
-    # Success Test Cases
-    # =============================================================================
-
-    @patch('builtins.open')
-    def test_validate_config_success_all_types(self, mock_file_open):
-        """Verifies that a comprehensive, valid config with all types passes validation."""
-        mock_file_open.side_effect = self._get_mock_open(VALID_ALL_TYPES_YAML)
-
-        aggregations = validate_config("aggregation.yaml", self.schema_path)
-        
-        self.assertEqual(len(aggregations), 6)
-        self.assertEqual(aggregations[0]["type"], "linked_edges")
-        self.assertEqual(aggregations[1]["source_type"], "County")
-        self.assertEqual(aggregations[2]["ancestor_sv_id"], "Count_Person")
-        self.assertEqual(aggregations[3]["entity_types"], ["MortalityEvent"])
-
-    # =============================================================================
-    # Schema Constraint Test Cases
-    # =============================================================================
-
-    @patch('builtins.open')
-    def test_validate_config_missing_type(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_MISSING_TYPE_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("'type' is a required property", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_missing_imports(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_MISSING_IMPORTS_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("'imports' is a required property", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_invalid_imports_type(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_IMPORTS_TYPE_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("is not of type 'array'", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_invalid_stage_type(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_STAGE_TYPE_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("is not of type 'integer'", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_invalid_stage_value(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_STAGE_VALUE_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("is less than the minimum of 1", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_empty_imports_list(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_EMPTY_IMPORTS_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("should be non-empty", ctx.exception.message)
-
-    # =============================================================================
-    # Conditional Dependency Test Cases (OneOf/Dependencies)
-    # =============================================================================
-
-    @patch('builtins.open')
-    def test_validate_config_place_missing_field(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_PLACE_MISSING_FIELD_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        # Validation fails because place rollup requires source_type
-        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_stat_var_missing_field(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_STAT_VAR_MISSING_FIELD_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_stat_var_empty_source_svs(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_STAT_VAR_EMPTY_SVS_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("should be non-empty", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_entity_missing_field(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(INVALID_ENTITY_MISSING_FIELD_YAML)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
-
-    # =============================================================================
-    # File System & Syntax Error Test Cases
-    # =============================================================================
-
-    @patch('builtins.open')
-    def test_validate_config_yaml_syntax_error(self, mock_file_open):
-        mock_file_open.side_effect = self._get_mock_open(MALFORMED_YAML)
-        with self.assertRaises(yaml.YAMLError):
-            validate_config("aggregation.yaml", self.schema_path)
 
-    def test_validate_config_missing_config_file(self):
-        with self.assertRaises(FileNotFoundError):
-            validate_config("non_existent_config.yaml", "schema.json")
+    def test_validate_config_success_all_types(self):
+        """Verifies that a comprehensive, valid config with all types passes validation."""
+        valid_all_types_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: linked_edges
+                imports: ["*"]
+                stage: 1
+                disabled: false
+
+              - type: place
+                source_type: County
+                destination_type: State
+                allow_multiple_to_places: true
+                imports: ["ImportA", "ImportB"]
+                stage: 2
+
+              - type: stat_var
+                ancestor_sv_id: Count_Person
+                source_sv_ids: ["Count_Person_Male", "Count_Person_Female"]
+                skip_all_sources_present_check: true
+                output_import_name: "Aggregated_Pop"
+                imports: ["ImportC"]
+                stage: 3
+
+              - type: entity
+                entity_types: ["MortalityEvent"]
+                location_props: ["location"]
+                date_prop: "date"
+                agg_date_formats: ["%Y"]
+                imports: ["ImportD"]
+
+              - type: provenance_summary
+                imports: ["*"]
+
+              - type: stat_var_groups
+                imports: ["*"]
+        """)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(valid_all_types_yaml)
+
+            aggregations = validate_config(config_path, self.schema_path)
+            
+            self.assertEqual(len(aggregations), 6)
+            self.assertEqual(aggregations[0]["type"], "linked_edges")
+            self.assertEqual(aggregations[1]["source_type"], "County")
+            self.assertEqual(aggregations[2]["ancestor_sv_id"], "Count_Person")
+            self.assertEqual(aggregations[3]["entity_types"], ["MortalityEvent"])
+
+
+class TestValidatorSchemaConstraints(unittest.TestCase):
+    """Verifies core schema constraint failures (types, required fields, values)."""
 
-    def test_validate_config_missing_schema_file(self):
-        # We patch os.path.exists to simulate config existing but schema missing
-        with patch('os.path.exists', side_effect=lambda path: "aggregation.yaml" in path):
-            with self.assertRaises(FileNotFoundError):
-                validate_config("aggregation.yaml", "non_existent_schema.json")
+    def setUp(self):
+        self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
 
-    @patch('builtins.open')
-    def test_validate_config_missing_aggregations_key(self, mock_file_open):
+    def test_validate_config_missing_type(self):
+        """Verifies that missing the required 'type' field raises ValidationError."""
+        invalid_missing_type_yaml = textwrap.dedent("""\
+            aggregations:
+              - imports: ["*"]
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_missing_type_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("'type' is a required property", ctx.exception.message)
+
+    def test_validate_config_missing_imports(self):
+        """Verifies that missing the required 'imports' field raises ValidationError."""
+        invalid_missing_imports_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: linked_edges
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_missing_imports_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("'imports' is a required property", ctx.exception.message)
+
+    def test_validate_config_invalid_imports_type(self):
+        """Verifies that imports field being a string instead of an array raises ValidationError."""
+        invalid_imports_type_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: linked_edges
+                imports: "*"
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_imports_type_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("is not of type 'array'", ctx.exception.message)
+
+    def test_validate_config_invalid_stage_type(self):
+        """Verifies that stage field being a string instead of an integer raises ValidationError."""
+        invalid_stage_type_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: linked_edges
+                imports: ["*"]
+                stage: "first"
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_stage_type_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("is not of type 'integer'", ctx.exception.message)
+
+    def test_validate_config_invalid_stage_value(self):
+        """Verifies that a stage value of 0 (minimum is 1) raises ValidationError."""
+        invalid_stage_value_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: linked_edges
+                imports: ["*"]
+                stage: 0
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_stage_value_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("is less than the minimum of 1", ctx.exception.message)
+
+    def test_validate_config_empty_imports_list(self):
+        """Verifies that an empty imports list raises ValidationError."""
+        invalid_empty_imports_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: linked_edges
+                imports: []
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_empty_imports_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("should be non-empty", ctx.exception.message)
+
+    def test_validate_config_missing_aggregations_key(self):
         """Verifies that missing the required 'aggregations' root key raises ValidationError."""
-        missing_aggregations_yaml = """
-        some_other_key: []
-        """
-        mock_file_open.side_effect = self._get_mock_open(missing_aggregations_yaml)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("'aggregations' is a required property", ctx.exception.message)
-
-    @patch('builtins.open')
-    def test_validate_config_empty_file(self, mock_file_open):
+        missing_aggregations_yaml = textwrap.dedent("""\
+            some_other_key: []
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(missing_aggregations_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("'aggregations' is a required property", ctx.exception.message)
+
+    def test_validate_config_empty_file(self):
         """Verifies that a completely empty configuration file raises ValidationError."""
         empty_yaml = ""
-        mock_file_open.side_effect = self._get_mock_open(empty_yaml)
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config("aggregation.yaml", self.schema_path)
-        self.assertIn("'aggregations' is a required property", ctx.exception.message)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(empty_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("'aggregations' is a required property", ctx.exception.message)
+
+
+class TestValidatorConditionalDependencies(unittest.TestCase):
+    """Verifies type-specific conditional dependencies (OneOf / dependencies)."""
+
+    def setUp(self):
+        self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
+
+    def test_validate_config_place_missing_field(self):
+        """Verifies that a place step missing the required 'source_type' raises ValidationError."""
+        invalid_place_missing_field_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: place
+                destination_type: State
+                imports: ["*"]
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_place_missing_field_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+
+    def test_validate_config_stat_var_missing_field(self):
+        """Verifies that a stat_var step missing the required 'source_sv_ids' raises ValidationError."""
+        invalid_stat_var_missing_field_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: stat_var
+                ancestor_sv_id: Count_Person
+                imports: ["*"]
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_stat_var_missing_field_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+
+    def test_validate_config_stat_var_empty_source_svs(self):
+        """Verifies that a stat_var step with an empty source_sv_ids array raises ValidationError."""
+        invalid_stat_var_empty_svs_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: stat_var
+                ancestor_sv_id: Count_Person
+                source_sv_ids: []
+                imports: ["*"]
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_stat_var_empty_svs_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("should be non-empty", ctx.exception.message)
+
+    def test_validate_config_entity_missing_field(self):
+        """Verifies that an entity step missing the required 'location_props' raises ValidationError."""
+        invalid_entity_missing_field_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: entity
+                entity_types: ["Event"]
+                imports: ["*"]
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(invalid_entity_missing_field_yaml)
+
+            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+                validate_config(config_path, self.schema_path)
+            self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+
+
+class TestValidatorErrorsAndFileSystem(unittest.TestCase):
+    """Verifies file-system issues and non-schema parsing errors (YAML syntax)."""
+
+    def setUp(self):
+        self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
+
+    def test_validate_config_yaml_syntax_error(self):
+        """Verifies that malformed YAML syntax raises YAMLError."""
+        malformed_yaml = textwrap.dedent("""\
+            aggregations:
+              - type: linked_edges
+              imports:
+              - "*"
+        """)
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            with open(config_path, "w") as f:
+                f.write(malformed_yaml)
+
+            with self.assertRaises(yaml.YAMLError):
+                validate_config(config_path, self.schema_path)
+
+    def test_validate_config_missing_config_file(self):
+        """Verifies that a missing config file path raises FileNotFoundError."""
+        with self.assertRaises(FileNotFoundError):
+            validate_config("non_existent_config.yaml", self.schema_path)
+
+    def test_validate_config_missing_schema_file(self):
+        """Verifies that a missing schema file path raises FileNotFoundError."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_path = os.path.join(tmpdir, "aggregation.yaml")
+            # Create a real, valid config file so it exists
+            with open(config_path, "w") as f:
+                f.write("aggregations: []")
+            
+            # Pass the real config path, but a non-existent schema path
+            with self.assertRaises(FileNotFoundError) as ctx:
+                validate_config(config_path, "non_existent_schema.json")
+            self.assertIn("JSON Schema file not found", str(ctx.exception))
 
 
 if __name__ == '__main__':

From a26d9d4a10d3c71750a2cad6ca0d3bbcac02dbec Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 23:47:02 +0530
Subject: [PATCH 15/33] refactor(aggregation): polish orchestrator logging and
 flatten unit test suites

---
 .../aggregation/orchestrator.py               |   4 +-
 .../aggregation/orchestrator_test.py          |   2 +-
 .../aggregation/validator_test.py             | 214 +++++++++---------
 3 files changed, 103 insertions(+), 117 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index cfb931125..4ef337aa8 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -48,8 +48,6 @@ def __init__(self,
             config_file_path: Optional custom path to the aggregation.yaml file.
                 If not specified, defaults to the aggregation.yaml in the parent directory.
         """
-        # Always run asynchronously at the executor level for stages to run in parallel
-        # We handle sequential blocking between stages at the workflow/router level
         self.executor = BigQueryExecutor(connection_id=connection_id,
                                          project_id=project_id,
                                          instance_id=instance_id,
@@ -78,7 +76,7 @@ def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
         Returns:
             A list of BigQuery job IDs submitted for this stage.
         """
-        logging.info(f"=== Starting Aggregation Orchestration for Stage {stage_num} ===")
+        logging.info(f"Starting Aggregation Orchestration for Stage {stage_num}")
         logging.info(f"Active imports in this run: {active_imports}")
         jobs = []
 
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index 435d5a658..a0b2627f4 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Unit tests for the AggregationOrchestrator class using real temporary files."""
+"""Unit tests for the AggregationOrchestrator class."""
 
 import json
 import os
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
index 44cd56d82..617e43a97 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
@@ -31,8 +31,12 @@ class TestValidatorSuccess(unittest.TestCase):
     """Verifies successful validation paths for valid configurations."""
 
     def setUp(self):
-        # Load the actual schema from the workspace to ensure tests remain realistic
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
+        self.tmpdir = tempfile.TemporaryDirectory()
+        self.config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+
+    def tearDown(self):
+        self.tmpdir.cleanup()
 
     def test_validate_config_success_all_types(self):
         """Verifies that a comprehensive, valid config with all types passes validation."""
@@ -72,18 +76,16 @@ def test_validate_config_success_all_types(self):
                 imports: ["*"]
         """)
 
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(valid_all_types_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(valid_all_types_yaml)
 
-            aggregations = validate_config(config_path, self.schema_path)
-            
-            self.assertEqual(len(aggregations), 6)
-            self.assertEqual(aggregations[0]["type"], "linked_edges")
-            self.assertEqual(aggregations[1]["source_type"], "County")
-            self.assertEqual(aggregations[2]["ancestor_sv_id"], "Count_Person")
-            self.assertEqual(aggregations[3]["entity_types"], ["MortalityEvent"])
+        aggregations = validate_config(self.config_path, self.schema_path)
+        
+        self.assertEqual(len(aggregations), 6)
+        self.assertEqual(aggregations[0]["type"], "linked_edges")
+        self.assertEqual(aggregations[1]["source_type"], "County")
+        self.assertEqual(aggregations[2]["ancestor_sv_id"], "Count_Person")
+        self.assertEqual(aggregations[3]["entity_types"], ["MortalityEvent"])
 
 
 class TestValidatorSchemaConstraints(unittest.TestCase):
@@ -91,6 +93,11 @@ class TestValidatorSchemaConstraints(unittest.TestCase):
 
     def setUp(self):
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
+        self.tmpdir = tempfile.TemporaryDirectory()
+        self.config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+
+    def tearDown(self):
+        self.tmpdir.cleanup()
 
     def test_validate_config_missing_type(self):
         """Verifies that missing the required 'type' field raises ValidationError."""
@@ -98,14 +105,12 @@ def test_validate_config_missing_type(self):
             aggregations:
               - imports: ["*"]
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_missing_type_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_missing_type_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("'type' is a required property", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("'type' is a required property", ctx.exception.message)
 
     def test_validate_config_missing_imports(self):
         """Verifies that missing the required 'imports' field raises ValidationError."""
@@ -113,14 +118,12 @@ def test_validate_config_missing_imports(self):
             aggregations:
               - type: linked_edges
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_missing_imports_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_missing_imports_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("'imports' is a required property", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("'imports' is a required property", ctx.exception.message)
 
     def test_validate_config_invalid_imports_type(self):
         """Verifies that imports field being a string instead of an array raises ValidationError."""
@@ -129,14 +132,12 @@ def test_validate_config_invalid_imports_type(self):
               - type: linked_edges
                 imports: "*"
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_imports_type_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_imports_type_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("is not of type 'array'", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("is not of type 'array'", ctx.exception.message)
 
     def test_validate_config_invalid_stage_type(self):
         """Verifies that stage field being a string instead of an integer raises ValidationError."""
@@ -146,14 +147,12 @@ def test_validate_config_invalid_stage_type(self):
                 imports: ["*"]
                 stage: "first"
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_stage_type_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_stage_type_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("is not of type 'integer'", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("is not of type 'integer'", ctx.exception.message)
 
     def test_validate_config_invalid_stage_value(self):
         """Verifies that a stage value of 0 (minimum is 1) raises ValidationError."""
@@ -163,14 +162,12 @@ def test_validate_config_invalid_stage_value(self):
                 imports: ["*"]
                 stage: 0
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_stage_value_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_stage_value_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("is less than the minimum of 1", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("is less than the minimum of 1", ctx.exception.message)
 
     def test_validate_config_empty_imports_list(self):
         """Verifies that an empty imports list raises ValidationError."""
@@ -179,40 +176,34 @@ def test_validate_config_empty_imports_list(self):
               - type: linked_edges
                 imports: []
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_empty_imports_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_empty_imports_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("should be non-empty", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("should be non-empty", ctx.exception.message)
 
     def test_validate_config_missing_aggregations_key(self):
         """Verifies that missing the required 'aggregations' root key raises ValidationError."""
         missing_aggregations_yaml = textwrap.dedent("""\
             some_other_key: []
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(missing_aggregations_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(missing_aggregations_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("'aggregations' is a required property", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("'aggregations' is a required property", ctx.exception.message)
 
     def test_validate_config_empty_file(self):
         """Verifies that a completely empty configuration file raises ValidationError."""
         empty_yaml = ""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(empty_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(empty_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("'aggregations' is a required property", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("'aggregations' is a required property", ctx.exception.message)
 
 
 class TestValidatorConditionalDependencies(unittest.TestCase):
@@ -220,6 +211,11 @@ class TestValidatorConditionalDependencies(unittest.TestCase):
 
     def setUp(self):
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
+        self.tmpdir = tempfile.TemporaryDirectory()
+        self.config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+
+    def tearDown(self):
+        self.tmpdir.cleanup()
 
     def test_validate_config_place_missing_field(self):
         """Verifies that a place step missing the required 'source_type' raises ValidationError."""
@@ -229,14 +225,12 @@ def test_validate_config_place_missing_field(self):
                 destination_type: State
                 imports: ["*"]
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_place_missing_field_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_place_missing_field_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
 
     def test_validate_config_stat_var_missing_field(self):
         """Verifies that a stat_var step missing the required 'source_sv_ids' raises ValidationError."""
@@ -246,14 +240,12 @@ def test_validate_config_stat_var_missing_field(self):
                 ancestor_sv_id: Count_Person
                 imports: ["*"]
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_stat_var_missing_field_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_stat_var_missing_field_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
 
     def test_validate_config_stat_var_empty_source_svs(self):
         """Verifies that a stat_var step with an empty source_sv_ids array raises ValidationError."""
@@ -264,14 +256,12 @@ def test_validate_config_stat_var_empty_source_svs(self):
                 source_sv_ids: []
                 imports: ["*"]
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_stat_var_empty_svs_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_stat_var_empty_svs_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("should be non-empty", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("should be non-empty", ctx.exception.message)
 
     def test_validate_config_entity_missing_field(self):
         """Verifies that an entity step missing the required 'location_props' raises ValidationError."""
@@ -281,14 +271,12 @@ def test_validate_config_entity_missing_field(self):
                 entity_types: ["Event"]
                 imports: ["*"]
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(invalid_entity_missing_field_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(invalid_entity_missing_field_yaml)
 
-            with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-                validate_config(config_path, self.schema_path)
-            self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
+            validate_config(self.config_path, self.schema_path)
+        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
 
 
 class TestValidatorErrorsAndFileSystem(unittest.TestCase):
@@ -296,6 +284,11 @@ class TestValidatorErrorsAndFileSystem(unittest.TestCase):
 
     def setUp(self):
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
+        self.tmpdir = tempfile.TemporaryDirectory()
+        self.config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+
+    def tearDown(self):
+        self.tmpdir.cleanup()
 
     def test_validate_config_yaml_syntax_error(self):
         """Verifies that malformed YAML syntax raises YAMLError."""
@@ -305,31 +298,26 @@ def test_validate_config_yaml_syntax_error(self):
               imports:
               - "*"
         """)
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            with open(config_path, "w") as f:
-                f.write(malformed_yaml)
+        with open(self.config_path, "w") as f:
+            f.write(malformed_yaml)
 
-            with self.assertRaises(yaml.YAMLError):
-                validate_config(config_path, self.schema_path)
+        with self.assertRaises(yaml.YAMLError):
+            validate_config(self.config_path, self.schema_path)
 
     def test_validate_config_missing_config_file(self):
         """Verifies that a missing config file path raises FileNotFoundError."""
-        with self.assertRaises(FileNotFoundError):
+        with self.assertRaises(FileNotFoundError) as ctx:
             validate_config("non_existent_config.yaml", self.schema_path)
+        self.assertIn("Aggregation config file not found", str(ctx.exception))
 
     def test_validate_config_missing_schema_file(self):
         """Verifies that a missing schema file path raises FileNotFoundError."""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            config_path = os.path.join(tmpdir, "aggregation.yaml")
-            # Create a real, valid config file so it exists
-            with open(config_path, "w") as f:
-                f.write("aggregations: []")
-            
-            # Pass the real config path, but a non-existent schema path
-            with self.assertRaises(FileNotFoundError) as ctx:
-                validate_config(config_path, "non_existent_schema.json")
-            self.assertIn("JSON Schema file not found", str(ctx.exception))
+        with open(self.config_path, "w") as f:
+            f.write("aggregations: []")
+
+        with self.assertRaises(FileNotFoundError) as ctx:
+            validate_config(self.config_path, "non_existent_schema.json")
+        self.assertIn("JSON Schema file not found", str(ctx.exception))
 
 
 if __name__ == '__main__':

From cfecadc362728849367238cfeeb5140cef3d0053 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Wed, 24 Jun 2026 23:51:15 +0530
Subject: [PATCH 16/33] style(aggregation): clean up and simplify
 aggregation.yaml comments

---
 pipeline/workflow/ingestion-helper/aggregation.yaml | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation.yaml b/pipeline/workflow/ingestion-helper/aggregation.yaml
index b6eb01acb..b8786aa51 100644
--- a/pipeline/workflow/ingestion-helper/aggregation.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation.yaml
@@ -1,15 +1,6 @@
-# =============================================================================
-# Data Commons Aggregation Configuration
-# =============================================================================
-# By default, this runs the standard global post-processing steps in parallel.
-# You can customize this file to add place-based rollups, statistical variable
-# aggregations, or to disable specific steps.
+# Data Commons Aggregation Configuration. See the README for details.
 
 aggregations:
-
-  # ---------------------------------------------------------------------------
-  # Stage 1: Standard Global Steps (Run in parallel by default)
-  # ---------------------------------------------------------------------------
   # Generates linkedContainedInPlace, linkedMemberOf, etc.
   - type: linked_edges
     imports: ["*"]

From 05ea5d9652e66e5e51860d54f604e2c986f6122f Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 15:25:51 +0530
Subject: [PATCH 17/33] feat(aggregation): add YAML aggregation configs

---
 .../aggregation/configs/entity.yaml           |  188 ++
 .../aggregation/configs/place.yaml            |  139 ++
 .../aggregation/configs/statvar.yaml          | 1791 +++++++++++++++++
 .../configs/statvar_calculation.yaml          |  324 +++
 .../aggregation/configs/statvar_series.yaml   |  449 +++++
 .../aggregation/configs/super_enum.yaml       |   25 +
 6 files changed, 2916 insertions(+)
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/configs/super_enum.yaml

diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
new file mode 100644
index 000000000..dcf30500d
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
@@ -0,0 +1,188 @@
+calculations:
+
+  # Earthquakes
+  - type: ENTITY_AGGREGATION
+    output_import: EarthquakeUSGS_Agg
+    input_imports:
+      - EarthquakeUSGS
+    entity_aggregation:
+      entity_types:
+        - EarthquakeEvent
+      location_props:
+        - affectedPlace
+      date_prop: occurrenceTime
+      agg_date_formats:
+        - YYYY
+        - YYYY-MM
+      constraints:
+        - "magnitude: [3 - M]"
+        - "magnitude: [4 - M]"
+        - "magnitude: [5 - M]"
+        - "magnitude: [6 - M]"
+        - "magnitude: [7 - M]"
+        - "magnitude: [8 - M]"
+        - "magnitude: [9 - M]"
+        - "magnitude: [3 4 M]"
+        - "magnitude: [4 5 M]"
+        - "magnitude: [5 6 M]"
+        - "magnitude: [6 7 M]"
+        - "magnitude: [7 8 M]"
+        - "magnitude: [8 9 M]"
+
+  # Fires
+  - type: ENTITY_AGGREGATION
+    output_import: FireFAMWEB_Agg
+    input_imports:
+      - FireFAMWEB
+    entity_aggregation:
+      entity_types:
+        - WildlandFireEvent
+      location_props:
+        - location
+      date_prop: discoveryDate
+      agg_date_formats:
+        - YYYY
+
+  - type: ENTITY_AGGREGATION
+    output_import: FireWFIGS_Agg
+    input_imports:
+      - FireWFIGS
+    entity_aggregation:
+      entity_types:
+        - FireIncidentComplexEvent
+        - PrescribedFireEvent
+        - WildlandFireEvent
+      location_props:
+        - location
+      date_prop: discoveryDate
+      agg_date_formats:
+        - YYYY-MM
+        - YYYY
+
+  - type: ENTITY_AGGREGATION
+    output_import: NASA_VIIRSActiveFiresEvents_Agg
+    # Aggregate event counts for S2-cells of level 13
+    input_imports:
+      - NASA_VIIRSActiveFiresEvents
+    entity_aggregation:
+      entity_types:
+        - FireEvent
+      location_props:
+        - affectedPlace
+      date_prop: startDate
+      agg_date_formats:
+        - YYYY
+        - YYYY-MM
+
+  # Storms
+  - type: ENTITY_AGGREGATION
+    output_import: StormNOAA_Agg
+    input_imports:
+      - StormNOAA
+    entity_aggregation:
+      location_props:
+        - affectedPlace
+      date_prop: startDate
+      agg_date_formats:
+        - YYYY
+        - YYYY-MM
+      # The following query is used to get all the types:
+      #
+      # SELECT DISTINCT object_id
+      # FROM `datcom-store.dc_kg_latest.Triple`
+      # WHERE prov_id='dc/svnv9g3' AND predicate = 'typeOf'
+      entity_types:
+        - MarineHighWindEvent
+        - HighWindEvent
+        - HeavyRainEvent
+        - DebrisFlowEvent
+        - DenseSmokeEvent
+        - TropicalDepressionEvent
+        - LandslideEvent
+        - DustDevilEvent
+        - WinterStormEvent
+        - MarineHailEvent
+        - TsunamiEvent
+        - BlizzardEvent
+        - RipCurrentEvent
+        - HeavySnowEvent
+        - ExtremeColdWindChillEvent
+        - MarineDenseFogEvent
+        - TropicalStormEvent
+        - StormSurgeTideEvent
+        - NorthernLightsEvent
+        - FrostFreezeEvent
+        - WildfireEvent
+        - SleetEvent
+        - ColdWindChillEvent
+        - MarineStrongWindEvent
+        - LightningEvent
+        - SneakerwaveEvent
+        - AstronomicalLowTideEvent
+        - MarineTropicalStormEvent
+        - VolcanicAshEvent
+        - CoastalFloodEvent
+        - GeoCoordinates
+        - HeavyWindEvent
+        - IceStormEvent
+        - StrongWindEvent
+        - WinterWeatherEvent
+        - VolcanicAshfallEvent
+        - ThunderstormWindEvent
+        - FlashFloodEvent
+        - HeatEvent
+        - DustStormEvent
+        - ExcessiveHeatEvent
+        - HailEvent
+        - AvalancheEvent
+        - MarineThunderstormWindEvent
+        - LakeshoreFloodEvent
+        - DroughtEvent
+        - HighSurfEvent
+        - HurricaneTyphoonEvent
+        - LakeEffectSnowEvent
+        - WaterspoutEvent
+        - MarineTropicalDepressionEvent
+        - SeicheEvent
+        - HighSnowEvent
+        - DenseFogEvent
+        - HurricaneEvent
+        - MarineHurricaneTyphoonEvent
+        - FloodEvent
+        - FunnelCloudEvent
+        - FreezingFogEvent
+        - TornadoEvent
+        - MarineLightningEvent
+        - StormEpisode
+
+  # Floods
+  - type: ENTITY_AGGREGATION
+    output_import: DynamicWorld_FloodEvents_Agg
+    # Aggregate event counts for S2-cells of level 13
+    input_imports:
+      - DynamicWorld_FloodEvents
+    entity_aggregation:
+      entity_types:
+        - FloodEvent
+      location_props:
+        - affectedPlace
+      date_prop: startDate
+      agg_date_formats:
+        - YYYY
+        - YYYY-MM
+
+  # Heat/Cold Temperature Events counts
+  - type: ENTITY_AGGREGATION
+    output_import: TemperatureEvents_Agg
+    input_imports:
+      - TemperatureEvents
+    entity_aggregation:
+      entity_types:
+        - HeatTemperatureEvent
+        - ColdTemperatureEvent
+      location_props:
+        - affectedPlace
+      date_prop: startDate
+      agg_date_formats:
+        - YYYY
+        - YYYY-MM
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
new file mode 100644
index 000000000..6d1108f73
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
@@ -0,0 +1,139 @@
+calculations:
+  - type: PLACE_AGGREGATION
+    input_imports: CensusACS5YearSurvey
+    place_aggregation:
+      from_place_types: State
+      to_place_types: Country
+    round: 1
+    output_import: CensusACS5YearSurvey_AggCountry
+
+  - type: PLACE_AGGREGATION
+    input_imports: CensusSAHIE
+    place_aggregation:
+      from_place_types: State
+      to_place_types: Country
+    round: 1
+    output_import: CensusSAHIE_AggCountry
+
+  - type: PLACE_AGGREGATION
+    input_imports: CDCMortality
+    place_aggregation:
+      from_place_types: County
+      to_place_types: State
+    round: 1
+    output_import: CDCMortality_AggState
+
+  - type: PLACE_AGGREGATION
+    input_imports: CDCMortality_AggState
+    place_aggregation:
+      from_place_types: State
+      to_place_types: Country
+    round: 2
+    output_import: CDCMortality_AggState_AggCountry
+
+  # - type: PLACE_AGGREGATION
+  #   input_imports: FBIGovCrime
+  #   place_aggregation:
+  #     from_place_types: State
+  #     to_place_types: Country
+  #   round: 1
+  #   output_import: FBIGovCrime_AggCountry
+
+  - type: PLACE_AGGREGATION
+    input_imports: DEA_ARCOS
+    place_aggregation:
+      from_place_types: County
+      to_place_types: State
+    round: 1
+    output_import: DEA_ARCOS_AggState
+
+  - type: PLACE_AGGREGATION
+    input_imports: DEA_ARCOS_AggState
+    place_aggregation:
+      from_place_types: State
+      to_place_types: Country
+    round: 2
+    output_import: DEA_ARCOS_AggState_AggCountry
+
+  - type: PLACE_AGGREGATION
+    input_imports: EPA_EJSCREEN
+    place_aggregation:
+      from_place_types: CensusBlockGroup
+      to_place_types: CensusTract
+    round: 1
+    output_import: EPA_EJSCREEN_AggCensusTract
+
+  - type: PLACE_AGGREGATION
+    input_imports: EPA_EJSCREEN_AggCensusTract
+    place_aggregation:
+      from_place_types: CensusTract
+      to_place_types: County
+    round: 2
+    output_import: EPA_EJSCREEN_AggCensusTract_AggCounty
+
+  - type: PLACE_AGGREGATION
+    input_imports: DeepSolar
+    place_aggregation:
+      from_place_types: CensusBlockGroup
+      to_place_types: CensusTract
+    round: 1
+    output_import: DeepSolar_AggCensusTract
+
+  - type: PLACE_AGGREGATION
+    input_imports: DeepSolar_AggCensusTract
+    place_aggregation:
+      from_place_types: CensusTract
+      to_place_types: County
+    round: 2
+    output_import: DeepSolar_AggCensusTract_AggCounty
+
+  - type: PLACE_AGGREGATION
+    input_imports: EPA_GHGRP
+    place_aggregation:
+      from_place_types: EpaReportingFacility
+      to_place_types: County
+    round: 1
+    output_import: EPA_GHGRP_AggCounty
+
+  - type: PLACE_AGGREGATION
+    input_imports: EPA_GHGRP
+    place_aggregation:
+      from_place_types: EpaReportingFacility
+      to_place_types: CensusZipCodeTabulationArea
+    round: 1
+    output_import: EPA_GHGRP_AggCensusZipCodeTabulationArea
+
+  - type: PLACE_AGGREGATION
+    input_imports: EPA_GHGRP_AggCounty
+    place_aggregation:
+      from_place_types: County
+      to_place_types: State
+    round: 2
+    output_import: EPA_GHGRP_AggCounty_AggState
+
+  - type: PLACE_AGGREGATION
+    input_imports: RFF_USGridGeo_WeatherVariabilityForecast
+    place_aggregation:
+      from_place_types: GeoGridPlace_0.25Deg
+      to_place_types: County
+      allow_multiple_to_places: true
+    round: 1
+    output_import: RFF_USGridGeo_WeatherVariabilityForecast_AggCounty
+
+  - type: PLACE_AGGREGATION
+    input_imports: India_RBIStateDomesticProduct
+    place_aggregation:
+      from_place_types: State
+      to_place_types: Country
+      allow_multiple_to_places: true
+    round: 1
+    output_import: India_RBIStateDomesticProduct_AggCountry
+
+  - type: PLACE_AGGREGATION
+    input_imports: India_RBIStateDomesticProduct_StatVarAgg
+    place_aggregation:
+      from_place_types: State
+      to_place_types: Country
+      allow_multiple_to_places: true
+    round: 1
+    output_import: India_RBIStateDomesticProduct_StatVarAgg_AggCountry
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
new file mode 100644
index 000000000..fe6743c5b
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
@@ -0,0 +1,1791 @@
+calculations:
+
+#
+# Health Insurance Coverage
+# -------------------------
+#
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_HealthInsurance_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+      # 1. Census Table B18135 includes insurance coverage by disability-status and
+      #    age (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B18135).
+      #    We use that to compute total and by disability-status.
+      #
+      # Total uninsured
+      # https://screenshot.googleplex.com/v4s43nzoCC9G5Qp
+        - ancestor_sv_id: Count_Person_NoHealthInsurance
+          source_sv_ids:
+            # No Disability
+            - dc/y0dvhk0sggzef
+            - dc/kdg05h55y45y6
+            - dc/9drszqwd2nef7
+            # With Disability
+            - dc/bew8kj6l7tv93
+            - dc/96dqj47csvmy8
+            - dc/qr4s77egv27q2
+      # Total insured
+      # https://screenshot.googleplex.com/ARqaVprXtizwETy
+        - ancestor_sv_id: Count_Person_WithHealthInsurance
+          source_sv_ids:
+            # No Disability
+            - dc/32mhsxvq7qsm4
+            - dc/2s2dkbb7gz038
+            - dc/dc8vqzkx18x0c
+            # With Disability
+            - dc/s5efzs4x817p5
+            - dc/9j39148yn79zf
+            - dc/p3v76jcvdx919
+      # Uninsured with no disability
+      # The following 4 aggregations come from parts of the above two aggregations.
+        - ancestor_sv_id: Count_Person_NoHealthInsurance_NoDisability
+          source_sv_ids:
+            # No Disability
+            - dc/y0dvhk0sggzef
+            - dc/kdg05h55y45y6
+            - dc/9drszqwd2nef7
+      # Uninsured with disability
+        - ancestor_sv_id: Count_Person_NoHealthInsurance_WithDisability
+          source_sv_ids:
+            # With Disability
+            - dc/bew8kj6l7tv93
+            - dc/96dqj47csvmy8
+            - dc/qr4s77egv27q2
+      # Insured with no disability
+        - ancestor_sv_id: Count_Person_WithHealthInsurance_NoDisability
+          source_sv_ids:
+            # No Disability
+            - dc/32mhsxvq7qsm4
+            - dc/2s2dkbb7gz038
+            - dc/dc8vqzkx18x0c
+      # Insured with disability
+        - ancestor_sv_id: Count_Person_WithHealthInsurance_WithDisability
+          source_sv_ids:
+            # With Disability
+            - dc/s5efzs4x817p5
+            - dc/9j39148yn79zf
+            - dc/p3v76jcvdx919
+      #
+      # 2. Census Table B27001 includes insurance coverage by age and gender
+      #    breakdown (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B27001). We
+      #    use that to compute gender breakdown.
+      #
+      # https://screenshot.googleplex.com/aSP3i8ASJDxct2C
+        - ancestor_sv_id: Count_Person_Female_NoHealthInsurance
+          source_sv_ids:
+            - dc/jx2q10tbnwhf3
+            - dc/g1957rtkkrnpg
+            - dc/kbd1j4tv1r383
+            - dc/xkfbhgbnzhm8d
+            - dc/001qmh9sdqzeb
+            - dc/qhwj3qx90h4pd
+            - dc/jydkzj9v0yb2
+            - dc/qb6th3c5tcd92
+            - dc/zj152fzswgrw6
+      # https://screenshot.googleplex.com/57jf9CpaC7KGd7y
+        - ancestor_sv_id: Count_Person_Male_NoHealthInsurance
+          source_sv_ids:
+            - dc/fwqlkjjch0dg5
+            - dc/1qdvp8x0t3l11
+            - dc/33z5t6xqjepp1
+            - dc/260gxgmeglpb
+            - dc/l8j7ggjt8yx8b
+            - dc/4rdkjc2485w42
+            - dc/nn4xz2lv98h5c
+            - dc/ywjqcxc244sx2
+            - dc/j88b1fy7045j1
+      # https://screenshot.googleplex.com/5buzauZqyhTdnkV
+        - ancestor_sv_id: Count_Person_Female_WithHealthInsurance
+          source_sv_ids:
+            - dc/jjhxlkp3qk2m6
+            - dc/lwqlh3vtpzw51
+            - dc/1h5c2xbpd0edb
+            - dc/q4lk959j0bgq
+            - dc/1hs676pfqqthd
+            - dc/kkbq8kzlm6tw4
+            - dc/y9xxbzfr8yb06
+            - dc/whh1y53fj4p69
+            - dc/965mdb0c8tef9
+      # https://screenshot.googleplex.com/38JDwJJGKhNLmWM
+        - ancestor_sv_id: Count_Person_Male_WithHealthInsurance
+          source_sv_ids:
+            - dc/ve6s1v1eyrbt7
+            - dc/1n4mf17q78xtd
+            - dc/zyhtyhs9efej3
+            - dc/y6tzpetgq89v4
+            - dc/js56cehzj7wf4
+            - dc/yrmj9vp8b8n1d
+            - dc/wp3txgscfen9c
+            - dc/b3n86k0h3h2w
+            - dc/j8se7wv2gbdfd
+      # dc/g/Person_HealthInsurance-NoPrivateHealthInsurance
+      # Level 3
+        - ancestor_sv_id: Count_Person_NoPrivateHealthInsurance
+          source_sv_ids:
+            # Population: 26 - 34 Years, Female, No Private Health Insurance
+            # Population: 75 Years or More, Male, No Private Health Insurance
+            # Population: 6 Years or Less, Male, No Private Health Insurance
+            # Population: 55 - 64 Years, Female, No Private Health Insurance
+            # Population: 65 - 74 Years, Female, No Private Health Insurance
+            # Population: 18 - 24 Years, Female, No Private Health Insurance
+            # Population: 25 - 34 Years, Male, No Private Health Insurance
+            # Population: 45 - 54 Years, Female, No Private Health Insurance
+            # Population: 35 - 44 Years, Female, No Private Health Insurance
+            # Population: 19 - 25 Years, Male, No Private Health Insurance
+            # Population: 6 - 17 Years, Female, No Private Health Insurance
+            # Population: 26 - 34 Years, Male, No Private Health Insurance
+            # Population: 45 - 54 Years, Male, No Private Health Insurance
+            # Population: 6 Years or Less, Female, No Private Health Insurance
+            # Population: 6 - 18 Years, Male, No Private Health Insurance
+            # Population: 19 - 25 Years, Female, No Private Health Insurance
+            # Population: 25 - 34 Years, Female, No Private Health Insurance
+            # Population: 55 - 64 Years, Male, No Private Health Insurance
+            # Population: 65 - 74 Years, Male, No Private Health Insurance
+            # Population: 6 - 18 Years, Female, No Private Health Insurance
+            # Population: 18 - 24 Years, Male, No Private Health Insurance
+            # Population: 75 Years or More, Female, No Private Health Insurance
+            # Population: 35 - 44 Years, Male, No Private Health Insurance
+            # Population: 6 - 17 Years, Male, No Private Health Insurance
+      # dc/g/Person_HealthInsurance-NoPublicHealthInsurance
+      # Level 3
+        - ancestor_sv_id: Count_Person_NoPublicHealthInsurance
+          source_sv_ids:
+            # Population: 26 - 34 Years, Female, No Public Health Insurance
+            # Population: 65 - 74 Years, Male, No Public Health Insurance
+            # Population: 19 - 25 Years, Female, No Public Health Insurance
+            # Population: 45 - 54 Years, Female, No Public Health Insurance
+            # Population: 26 - 34 Years, Male, No Public Health Insurance
+            # Population: 35 - 44 Years, Male, No Public Health Insurance
+            # Population: 19 - 25 Years, Male, No Public Health Insurance
+            # Population: 6 - 18 Years, Male, No Public Health Insurance
+            # Population: 65 - 74 Years, Female, No Public Health Insurance
+            # Population: 45 - 54 Years, Male, No Public Health Insurance
+            # Population: 75 Years or More, Male, No Public Health Insurance
+            # Population: 6 - 18 Years, Female, No Public Health Insurance
+            # Population: 35 - 44 Years, Female, No Public Health Insurance
+            # Population: 55 - 64 Years, Male, No Public Health Insurance
+            # Population: 6 - 17 Years, Male, No Public Health Insurance
+            # Population: 18 - 24 Years, Female, No Public Health Insurance
+            # Population: 18 - 24 Years, Male, No Public Health Insurance
+            # Population: 75 Years or More, Female, No Public Health Insurance
+            # Population: 6 Years or Less, Female, No Public Health Insurance
+            # Population: 25 - 34 Years, Male, No Public Health Insurance
+            # Population: 6 - 17 Years, Female, No Public Health Insurance
+            # Population: 6 Years or Less, Male, No Public Health Insurance
+            # Population: 55 - 64 Years, Female, No Public Health Insurance
+            # Population: 25 - 34 Years, Female, No Public Health Insurance
+      # dc/g/Person_HealthInsurance-WithOneTypeOfHealthInsurance
+      # Level 1
+        - ancestor_sv_id: Count_Person_WithOneTypeOfHealthInsurance
+          source_sv_ids:
+            # Population: 18 Years or Less, With One Type of Health Insurance
+            # Population: 18 - 34 Years, With One Type of Health Insurance
+            # Population: 35 - 64 Years, With One Type of Health Insurance
+            # Population: 19 Years or Less, With One Type of Health Insurance
+            # Population: 19 - 34 Years, With One Type of Health Insurance
+            # Population: 65 Years or More, With One Type of Health Insurance
+      # dc/g/Person_HealthInsurance-WithPublicCoverage
+      # Level 3
+        - ancestor_sv_id: Count_Person_WithPublicCoverage
+          source_sv_ids:
+            # Population: 18 Years or Less, With Disability, With Public Coverage
+            # Population: 19 Years or Less, With Disability, With Public Coverage
+            # Population: 19 Years or Less, No Disability, With Public Coverage
+            # Population: 65 Years or More, No Disability, With Public Coverage
+            # Population: 19 - 64 Years, No Disability, With Public Coverage
+            # Population: 65 Years or More, With Disability, With Public Coverage
+            # Population: 18 - 64 Years, With Disability, With Public Coverage
+            # Population: 18 Years or Less, No Disability, With Public Coverage
+            # Population: 18 - 64 Years, No Disability, With Public Coverage
+            # Population: 19 - 64 Years, With Disability, With Public Coverage
+
+      #
+      # Ability to speak English
+      # ------------------------
+      #
+      # We aggregate abilityToSpeakEnglish over nativity (2) and languageSpokenAtHome
+      # (4) for a total of 8 source SVs from Census Table B16005
+      #   (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B16005)
+      #
+      # NOTE: Values SpeakEnglishLessThanVeryWell and SpeakEnglishVeryWell already
+      # have population values from a different table.
+      #
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_AbilityToSpeakEnglish_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+      # https://screenshot.googleplex.com/3Fw42DhNnD5fPLi
+        - ancestor_sv_id: Count_Person_SpeakEnglishNotAtAll
+          source_sv_ids:
+            - dc/edfjy64gmxf6f
+            - dc/0165994v0l8fh
+            - dc/0ltdeskbvkn3d
+            - dc/ves02m300f6lc
+            - dc/vh52n3qgw0kt
+            - dc/6yzk8t4e9t6v6
+            - dc/0kx32ff9c6d79
+            - dc/7cxlkzf56zk26
+      # https://screenshot.googleplex.com/3ExaNScXGyyEP2c
+        - ancestor_sv_id: Count_Person_SpeakEnglishNotWell
+          source_sv_ids:
+            - dc/f8qh4hp1830dg
+            - dc/3f6nmhf0zhtd4
+            - dc/l1zr0zx1yje61
+            - dc/zh4sly3kmwbwg
+            - dc/e4jl4qe9xc4n9
+            - dc/6rzmvxpgqlww6
+            - dc/wf1rm5zx34dtg
+            - dc/4xykjw3v6n4t3
+      # https://screenshot.googleplex.com/AaaSNUMpQwHRvSZ
+        - ancestor_sv_id: Count_Person_SpeakEnglishWell
+          source_sv_ids:
+            - dc/4n1wg2c7hjem
+            - dc/hbqhxpwsdvyj8
+            - dc/ewchtlvjlljk5
+            - dc/7st63yed0l4x
+            - dc/vx85c3xzwg981
+            - dc/92mnzpclp7tbd
+            - dc/yx0mdj3dnk3mb
+            - dc/1x48nzrne2f88
+
+      #
+      # In Armed Forces
+      # ---------------
+      #
+      # Census Table B23001 includes count of people in armed forces (in which case
+      # In labor Forces is implied) broken down by gender and age. From that we
+      # aggregate to total and by gender.
+      # (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B23001)
+      #
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_InArmedForces_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+      # Women in armed forces
+      # https://screenshot.googleplex.com/3ik3W3g2eqMjkSm
+        - ancestor_sv_id: Count_Person_Female_InArmedForces
+          source_sv_ids:
+            - dc/173smewzddlb
+            - dc/fjd99f6xm3mz3
+            - dc/n9wqebpd8jyhc
+            - dc/7tej0l9s6x2rh
+            - dc/hqz1tck7sdrm2
+            - dc/tmw7srr91bj41
+            - dc/lzzl0rx7m1yf9
+            - dc/8d182v9j04mg4
+            - dc/lb3lb4mg82mph
+            - dc/g2m31qc7q1x64
+      # Men in armed forces
+      # https://screenshot.googleplex.com/6dvhYcbmtHsjifQ
+        - ancestor_sv_id: Count_Person_Male_InArmedForces
+          source_sv_ids:
+            - dc/vp1gqv00d2ql3
+            - dc/04nv03cmv6btf
+            - dc/rhgnypl16nh6c
+            - dc/mtqbxrg29f64h
+            - dc/gd0jznk5n2kb3
+            - dc/4f6fv8bdstp3b
+            - dc/rlrwr4f7r4pg7
+            - dc/x63tjfw28tzvc
+            - dc/fpx513jvf4xed
+            - dc/2zj8jthd4f563
+      # Armed forces population, from combining the above two sets of SVs.
+        - ancestor_sv_id: Count_Person_InArmedForces
+          source_sv_ids:
+            # Female
+            - dc/173smewzddlb
+            - dc/fjd99f6xm3mz3
+            - dc/n9wqebpd8jyhc
+            - dc/7tej0l9s6x2rh
+            - dc/hqz1tck7sdrm2
+            - dc/tmw7srr91bj41
+            - dc/lzzl0rx7m1yf9
+            - dc/8d182v9j04mg4
+            - dc/lb3lb4mg82mph
+            - dc/g2m31qc7q1x64
+            # Male
+            - dc/vp1gqv00d2ql3
+            - dc/04nv03cmv6btf
+            - dc/rhgnypl16nh6c
+            - dc/mtqbxrg29f64h
+            - dc/gd0jznk5n2kb3
+            - dc/4f6fv8bdstp3b
+            - dc/rlrwr4f7r4pg7
+            - dc/x63tjfw28tzvc
+            - dc/fpx513jvf4xed
+            - dc/2zj8jthd4f563
+
+      # Education - CensusACS5YearSurvey
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_Education_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+      # dc/g/Person_EducationalAttainment-5ThAnd6ThGrade
+      # Level 1
+        - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_5ThAnd6ThGrade
+          source_sv_ids:
+            # Population: 5th And 6th Grade, Female
+            # Population: 5th And 6th Grade, Male
+      # dc/g/Person_EducationalAttainment-7ThAnd8ThGrade
+      # Level 1
+        - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_7ThAnd8ThGrade
+          source_sv_ids:
+            # Population: 7th And 8th Grade, Female
+            # Population: 7th And 8th Grade, Male
+      # dc/g/Person_EducationalAttainment-9ThTo12ThGradeNoDiploma
+      # Level 3
+        - ancestor_sv_id: Count_Person_EducationalAttainment_9ThTo12ThGradeNoDiploma
+          source_sv_ids:
+            # Population: 18 - 24 Years, 9th To 12th Grade No Diploma, Female
+            # Population: 18 - 24 Years, 9th To 12th Grade No Diploma, Male
+            # Population: 25 - 34 Years, 9th To 12th Grade No Diploma, Female
+            # Population: 25 - 34 Years, 9th To 12th Grade No Diploma, Male
+            # Population: 35 - 44 Years, 9th To 12th Grade No Diploma, Female
+            # Population: 35 - 44 Years, 9th To 12th Grade No Diploma, Male
+            # Population: 45 - 64 Years, 9th To 12th Grade No Diploma, Female
+            # Population: 45 - 64 Years, 9th To 12th Grade No Diploma, Male
+            # Population: 65 Years or More, 9th To 12th Grade No Diploma, Female
+            # Population: 65 Years or More, 9th To 12th Grade No Diploma, Male
+      # dc/g/Person_EducationalAttainment-LessThan9ThGrade
+      # Level 3
+        - ancestor_sv_id: Count_Person_EducationalAttainment_LessThan9ThGrade
+          source_sv_ids:
+            # Population: 18 - 24 Years, Less Than 9th Grade, Female
+            # Population: 18 - 24 Years, Less Than 9th Grade, Male
+            # Population: 25 - 34 Years, Less Than 9th Grade, Female
+            # Population: 25 - 34 Years, Less Than 9th Grade, Male
+            # Population: 35 - 44 Years, Less Than 9th Grade, Female
+            # Population: 35 - 44 Years, Less Than 9th Grade, Male
+            # Population: 45 - 64 Years, Less Than 9th Grade, Female
+            # Population: 45 - 64 Years, Less Than 9th Grade, Male
+            # Population: 65 Years or More, Less Than 9th Grade, Female
+            # Population: 65 Years or More, Less Than 9th Grade, Male
+      # dc/g/Person_EducationalAttainment-LessThanHighSchoolDiploma
+      # Level 3
+        - ancestor_sv_id: Count_Person_EducationalAttainment_LessThanHighSchoolDiploma
+          source_sv_ids:
+            # Population: Less Than High School Diploma, Male, Two or More Races
+            # Population: Less Than High School Diploma, Male, Asian Alone
+            # Population: Less Than High School Diploma, Female, Hispanic or Latino
+            # Population: Less Than High School Diploma, Male, White Alone
+            # Population: Less Than High School Diploma, Female, Asian Alone
+            # Population: Less Than High School Diploma, Male, Hispanic or Latino
+            # Population: Less Than High School Diploma, Male, Some Other Race Alone
+            # Population: Less Than High School Diploma, Female, White Alone Not Hispanic or Latino
+            # Population: Less Than High School Diploma, Male, Black or African American Alone
+            # Population: Less Than High School Diploma, Female, Black or African American Alone
+            # Population: Less Than High School Diploma, Male, Native Hawaiian or Other Pacific Islander Alone
+            # Population: Less Than High School Diploma, Male, American Indian or Alaska Native Alone
+            # Population: Less Than High School Diploma, Female, Native Hawaiian or Other Pacific Islander Alone
+            # Population: Less Than High School Diploma, Female, Some Other Race Alone
+            # Population: Less Than High School Diploma, Female, American Indian or Alaska Native Alone
+            # Population: Less Than High School Diploma, Female, White Alone
+            # Population: Less Than High School Diploma, Male, White Alone Not Hispanic or Latino
+            # Population: Less Than High School Diploma, Female, Two or More Races
+      # dc/g/Person_EducationalAttainment-NurseryTo4ThGrade
+      # Level 1
+        - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_NurseryTo4ThGrade
+          source_sv_ids:
+            # Population: Nursery To 4th Grade, Female
+            # Population: Nursery To 4th Grade, Male
+      # dc/g/Person_EducationalAttainment-SomeCollegeNoDegree
+      # Level 3
+        - ancestor_sv_id: Count_Person_EducationalAttainment_SomeCollegeNoDegree
+          source_sv_ids:
+            # Population: 18 - 24 Years, Some College No Degree, Female
+            # Population: 18 - 24 Years, Some College No Degree, Male
+            # Population: 25 - 34 Years, Some College No Degree, Female
+            # Population: 25 - 34 Years, Some College No Degree, Male
+            # Population: 35 - 44 Years, Some College No Degree, Female
+            # Population: 35 - 44 Years, Some College No Degree, Male
+            # Population: 45 - 64 Years, Some College No Degree, Female
+            # Population: 45 - 64 Years, Some College No Degree, Male
+            # Population: 65 Years or More, Some College No Degree, Female
+            # Population: 65 Years or More, Some College No Degree, Male
+        - ancestor_sv_id: Count_Person_EducationalAttainment_1StTo12ThGrade
+          source_sv_ids:
+            - Count_Person_EducationalAttainment1StGrade
+            - Count_Person_EducationalAttainment2NdGrade
+            - Count_Person_EducationalAttainment3RdGrade
+            - Count_Person_EducationalAttainment4ThGrade
+            - Count_Person_EducationalAttainment5ThGrade
+            - Count_Person_EducationalAttainment6ThGrade
+            - Count_Person_EducationalAttainment7ThGrade
+            - Count_Person_EducationalAttainment8ThGrade
+            - Count_Person_EducationalAttainment9ThGrade
+            - Count_Person_EducationalAttainment10ThGrade
+            - Count_Person_EducationalAttainment11ThGrade
+            - Count_Person_EducationalAttainment12ThGradeNoDiploma
+
+      # The following 9 aggregations are: SomeCollegeOrAssociatesDegree, by race.
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_AmericanIndianOrAlaskaNativeAlone
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, American Indian or Alaska Native Alone
+            - dc/mqxdr821c7kw3
+            # Population: Some College or Associates Degree, Male, American Indian or Alaska Native Alone
+            - dc/lyt8y97bbrkpc
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_AsianAlone
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, Asian Alone
+            - dc/whn99h1l0xgth
+            # Population: Some College or Associates Degree, Male, Asian Alone
+            - dc/fkvnj4rlrs84f
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_BlackOrAfricanAmericanAlone
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, Black or African American Alone
+            - dc/56md3ndhrmvm7
+            # Population: Some College or Associates Degree, Male, Black or African American Alone
+            - dc/e7h67vn5w4g13
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_HispanicOrLatino
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, Hispanic or Latino
+            - dc/3w039ndqy7qv1
+            # Population: Some College or Associates Degree, Male, Hispanic or Latino
+            - dc/7xf6mm0sg9y18
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_NativeHawaiianOrOtherPacificIslanderAlone
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, Native Hawaiian or Other Pacific Islander Alone
+            - dc/kpcfmb6lp3zpd
+            # Population: Some College or Associates Degree, Male, Native Hawaiian or Other Pacific Islander Alone
+            - dc/r3l3rfl1ms85f
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_SomeOtherRaceAlone
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, Some Other Race Alone
+            - dc/epw58ne8mytn5
+            # Population: Some College or Associates Degree, Male, Some Other Race Alone
+            - dc/t0mcpxqgr2lm9
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_TwoOrMoreRaces
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, Two or More Races
+            - dc/q98jxycvs422f
+            # Population: Some College or Associates Degree, Male, Two or More Races
+            - dc/zjlfv8d8v14f8
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_WhiteAlone
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, White Alone
+            - dc/9sneyc8lpk8dc
+            # Population: Some College or Associates Degree, Male, White Alone
+            - dc/d2ct8qmvcct81
+        - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_WhiteAloneNotHispanicOrLatino
+          source_sv_ids:
+            # Population: Some College or Associates Degree, Female, White Alone Not Hispanic or Latino
+            - dc/bqy52h7y4nq34
+            # Population: Some College or Associates Degree, Male, White Alone Not Hispanic or Latino
+            - dc/dc9v9h3q8l8n7
+
+      # Education - ACSED5YrSurvey
+  - type: STAT_VAR_AGGREGATION
+    output_import: ACSED5YrSurvey_StatVarAgg
+    input_imports:
+      - ACSED5YrSurvey
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Parent_Occupation_Management_Business_Science_Arts
+          source_sv_ids:
+            # Count of Parent: 16 Years or More, Civilian, Public School, Employed, in Labor Force, Management, Business, Science, And Arts Occupations
+            - dc/zvmdlctml84sb
+        - ancestor_sv_id: Count_Parent_Occupation_Natural_Resources_Construction_Maintenance
+          source_sv_ids:
+            # Count of Parent: 16 Years or More, Civilian, Public School, Employed, in Labor Force, Natural Resources, Construction, And Maintenance Occupations
+            - dc/fsrwm5cy84lmf
+        - ancestor_sv_id: Count_Parent_Occupation_Production_Transportation_Material_Moving
+          source_sv_ids:
+            # Count of Parent: 16 Years or More, Civilian, Public School, Employed, in Labor Force, Production, Transportation, And Material Moving Occupations
+            - dc/jbyb43crb1t37
+        - ancestor_sv_id: Count_Parent_Occupation_Sales_Office
+          source_sv_ids:
+            # Count of Parent: 16 Years or More, Civilian, Public School, Employed, in Labor Force, Sales And Office Occupations
+            - dc/p029jbzf00pw6
+        - ancestor_sv_id: Count_Parent_Occupation_Service
+          source_sv_ids:
+            # Count of Parent: 16 Years or More, Civilian, Public School, Employed, in Labor Force, Service Occupations
+            - dc/bstxmnb4k1wrb
+
+      # Demographics
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_Demographics_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_85OrMoreYears
+          source_sv_ids:
+            - Count_Person_85OrMoreYears_Male
+            - Count_Person_85OrMoreYears_Female
+        - ancestor_sv_id: Count_Person_0To4Years
+          source_sv_ids:
+            - Count_Person_0To4Years_Male
+            - Count_Person_0To4Years_Female
+
+      # Agriculture
+  - type: STAT_VAR_AGGREGATION
+    output_import: USDA_AgricultureCensus_Agriculture_StatVarAgg
+    input_imports:
+      - USDA_AgricultureCensus
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_Producer
+          source_sv_ids:
+            - Count_Person_Producer_BlackOrAfricanAmericanAlone
+            - Count_Person_Producer_AmericanIndianOrAlaskaNativeAlone
+            - Count_Person_Producer_NativeHawaiianOrOtherPacificIslanderAlone
+            - Count_Person_Producer_WhiteAlone
+            - Count_Person_Producer_TwoOrMoreRaces
+            - Count_Person_Producer_HispanicOrLatino
+            - Count_Person_Producer_AsianAlone
+          skip_all_sources_present_check: true
+
+      #
+      # Employment
+      # ------------------------
+
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_Employment_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSAgricultureForestryFishingHunting
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Agriculture, Forestry, Fishing And Hunting (NAICS/11)
+            - dc/scgpgdxgx9tr1
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Agriculture, Forestry, Fishing And Hunting (NAICS/11)
+            - dc/klz4pcgfe44rb
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSMiningQuarryingOilGasExtraction
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Mining, Quarrying, And Oil And Gas Extraction (NAICS/21)
+            - dc/jcq9116jwjvs3
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Mining, Quarrying, And Oil And Gas Extraction (NAICS/21)
+            - dc/xkywpwshj3ttb
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSUtilities
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Utilities (NAICS/22)
+            - dc/ewhc3j80tx5z1
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Utilities (NAICS/22)
+            - dc/hjepq5mk3yvy9
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSConstruction
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Construction (NAICS/23)
+            - dc/1h1rnmvtj706b
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Construction (NAICS/23)
+            - dc/0llhlrmmm5ce2
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSManufacturing
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Manufacturing (NAICS/31-33)
+            - dc/xsqgzwf1h8f33
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Manufacturing (NAICS/31-33)
+            - dc/ypmrvdtd4e159
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSWholesaleTrade
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Wholesale Trade (NAICS/42)
+            - dc/v2q0shf60bejd
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Wholesale Trade (NAICS/42)
+            - dc/3cn2vsmlfsfv1
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSRetailTrade
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Retail Trade (NAICS/44-45)
+            - dc/qcfd17k1c3vm9
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Retail Trade (NAICS/44-45)
+            - dc/3hh7cp878qgjb
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSTransportationWarehousing
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Transportation And Warehousing (NAICS/48-49)
+            - dc/cq266pvtkbll4
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Transportation And Warehousing (NAICS/48-49)
+            - dc/j9f2bf9fb5y1g
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSInformation
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Information (NAICS/51)
+            - dc/c9yj0kkxpf8ff
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Information (NAICS/51)
+            - dc/20cmnywrsexnc
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSFinanceInsurance
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Finance And Insurance (NAICS/52)
+            - dc/e7yf34hemnde9
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Finance And Insurance (NAICS/52)
+            - dc/3gcb6txyx3t88
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_Count_Establishment_NAICSRealEstateRentalLeasing
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Real Estate And Rental And Leasing (NAICS/53)
+            - dc/l292my8mjkns
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Real Estate And Rental And Leasing (NAICS/53)
+            - dc/4zgbhyej0lnd
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSProfessionalScientificTechnicalServices
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Professional, Scientific, And Technical Services (NAICS/54)
+            - dc/lc629dk4yeg4b
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Professional, Scientific, And Technical Services (NAICS/54)
+            - dc/gtbj7nt7jh608
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSManagementOfCompaniesEnterprises
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Management of Companies And Enterprises (NAICS/55)
+            - dc/zt2qhhtnjm4qh
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Management of Companies And Enterprises (NAICS/55)
+            - dc/z9qrge1jf1k3h
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSAdministrativeSupportWasteManagementRemediationServices
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Administrative And Support And Waste Management Services (NAICS/56)
+            - dc/ff2f4my72zx0c
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Administrative And Support And Waste Management Services (NAICS/56)
+            - dc/cmy92ne7lqr4d
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSEducationalServices
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Educational Services (NAICS/61)
+            - dc/1wr6sspk9nrjc
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Educational Services (NAICS/61)
+            - dc/xjvlbg6jjb6q8
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSHealthCareSocialAssistance
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Health Care And Social Assistance (NAICS/62)
+            - dc/nmd0vjps6psb2
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Health Care And Social Assistance (NAICS/62)
+            - dc/4gr6y83p0hvj
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSArtsEntertainmentRecreation
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Arts, Entertainment, And Recreation (NAICS/71)
+            - dc/9hrltcsyj2mcb
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Arts, Entertainment, And Recreation (NAICS/71)
+            - dc/0xmrxl7ztb4e7
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSAccommodationFoodServices
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Accommodation And Food Services (NAICS/72)
+            - dc/rydmsxhz7pxh1
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Accommodation And Food Services (NAICS/72)
+            - dc/pn22zf92qr31f
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSOtherServices
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Other Services, Except Public Administration (NAICS/81)
+            - dc/cyc1f26msdct4
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Other Services, Except Public Administration (NAICS/81)
+            - dc/ns2e3khxs6gw7
+        - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSPublicAdministration
+          source_sv_ids:
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Female, Public Administration (NAICS/92)
+            - dc/ylj424exymm1b
+            # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Public Administration (NAICS/92)
+            - dc/589p0gc36qem5
+
+      # Census Table B23001 includes count of employed civilians broken down by gender
+      # and age. From that we aggregate over age to get count of employed civilians
+      # for each gender.
+      # (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B23001)
+      #
+#   - type: STAT_VAR_AGGREGATION
+  #   type: STAT_VAR_AGGREGATION
+#     output_import: CensusACS5YearSurvey_Employment_StatVarAgg
+#     input_imports:
+#       - CensusACS5YearSurvey
+#       - CensusACS5YearSurvey_AggCountry
+#     stat_var_aggregation:
+#       aggregations:
+      # # Women Employed
+      # # https://screenshot.googleplex.com/AYFWvJqXa3EB2eU
+      #
+      # # The ancestor has "armedForcesStatus: dcs:Civilian", but the following sources
+      # # don't have. Fix it before adding this to aggregation.
+      # # dc/kz49wc5n3lhpd
+      # # dc/qcpg8c533syd6
+      # # dc/zv26z833d7g3c
+#         - ancestor_sv_id: Count_Person_Female_Employed
+#           source_sv_ids:
+#             - dc/yt1fm72s1y7b7
+#             - dc/hj65vwnt5csr7
+#             - dc/0sd3x3bb4qet5
+#             - dc/j4mzcc63n5zq5
+#             - dc/56jhsezrzl049
+#             - dc/ggx918j9p5tnf
+#             - dc/8rw47nf3ngle7
+#             - dc/hplj99j7mbfsh
+#             - dc/ksmx7fwfkm8lb
+#             - dc/6706lc55kg5d
+#             - dc/qcpg8c533syd6
+#             - dc/zv26z833d7g3c
+#             - dc/kz49wc5n3lhpd
+            #   }
+            #
+            # # The ancestor has "armedForcesStatus: dcs:Civilian", but the following sources
+            # # don't have. Fix it before adding this to aggregation.
+            # # dc/5cxs4br0jz02c
+            # # dc/s909dd4r22fw
+            # # dc/xepldf55yq6s5
+            #
+            # # Men Employed
+            # # https://screenshot.googleplex.com/C24sFkxNbUpWiQJ
+#         - ancestor_sv_id: Count_Person_Male_Employed
+#           source_sv_ids:
+#             - dc/2s6hps4z1qced
+#             - dc/xvtk9180lme1h
+#             - dc/twdr4c500yev5
+#             - dc/n0btf3nglhqqc
+#             - dc/7qwvkhr4tsyt8
+#             - dc/pkpm179bbh822
+#             - dc/gyrw68q6x77l9
+#             - dc/ly0fvmlf4mtf5
+#             - dc/m020zpvzch2gd
+#             - dc/sgm602ncby3bf
+#             - dc/xepldf55yq6s5
+#             - dc/s909dd4r22fw
+#             - dc/5cxs4br0jz02c
+            #   }
+
+      # Crime
+  - type: STAT_VAR_AGGREGATION
+    output_import: USNationalPrisonerStatistics_Crime_StatVarAgg
+    input_imports:
+      - USNationalPrisonerStatistics
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_CorrectionalFacilityLocation_OutOfState
+          source_sv_ids:
+            - dc/qgv9d3frn35qc
+            - dc/91vy0sf20wlg9
+
+      # WithOwnChildrenUnder18.
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_SubjectTables_S1251_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey_SubjectTables_S1251
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_WithOwnChildrenUnder18_Female
+          source_sv_ids:
+            - Count_Person_WithOwnChildrenUnder18_Female_FamilyHousehold_MarriedInThePast12Months_ResidesInHousehold
+            - Count_Person_WithOwnChildrenUnder18_Female_FamilyHousehold_DivorcedInThePast12Months_ResidesInHousehold
+        - ancestor_sv_id: Count_Person_WithOwnChildrenUnder18_Male
+          source_sv_ids:
+            - Count_Person_WithOwnChildrenUnder18_Male_FamilyHousehold_MarriedInThePast12Months_ResidesInHousehold
+            - Count_Person_WithOwnChildrenUnder18_Male_FamilyHousehold_DivorcedInThePast12Months_ResidesInHousehold
+        - ancestor_sv_id: Count_Person_WithOwnChildrenUnder18
+          source_sv_ids:
+            - Count_Person_WithOwnChildrenUnder18_Female
+            - Count_Person_WithOwnChildrenUnder18_Male
+
+      # Marriage
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_SubjectTables_S1201_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey_SubjectTables_S1201
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_InLaborForce_Divorced
+          source_sv_ids:
+            - Count_Person_InLaborForce_Female_Divorced
+            - Count_Person_InLaborForce_Male_Divorced
+        - ancestor_sv_id: Count_Person_InLaborForce_NeverMarried
+          source_sv_ids:
+            - Count_Person_InLaborForce_Female_NeverMarried
+            - Count_Person_InLaborForce_Male_NeverMarried
+        - ancestor_sv_id: Count_Person_InLaborForce_MarriedAndNotSeparated
+          source_sv_ids:
+            - Count_Person_InLaborForce_Female_MarriedAndNotSeparated
+            - Count_Person_InLaborForce_Male_MarriedAndNotSeparated
+        - ancestor_sv_id: Count_Person_InLaborForce_Widowed
+          source_sv_ids:
+            - Count_Person_InLaborForce_Female_Widowed
+            - Count_Person_InLaborForce_Male_Widowed
+
+      # Employment by business ownership type.
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_SubjectTables_S2408_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey_SubjectTables_S2408
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_PrivatelyOwnedNotForProfitEstablishment_PaidWorker
+          source_sv_ids:
+            - Count_Person_PrivatelyOwnedNotForProfitEstablishment_Male_PaidWorker
+            - Count_Person_PrivatelyOwnedNotForProfitEstablishment_Female_PaidWorker
+        - ancestor_sv_id: Count_Person_PrivatelyOwnedForProfitEstablishment_PaidWorker
+          source_sv_ids:
+            - Count_Person_PrivatelyOwnedForProfitEstablishment_Male_PaidWorker
+            - Count_Person_PrivatelyOwnedForProfitEstablishment_Female_PaidWorker
+
+      # US Citizen by Naturalization
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_SubjectTables_S0504_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey_SubjectTables_S0504
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn
+          source_sv_ids:
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthAfrica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthCaribbean
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthCentralAmericaExceptMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthLatinAmerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthNorthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthNorthernWesternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthOceania
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthSouthCentralAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthSouthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthSouthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthSouthernEasternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn_PlaceOfBirthWesternAsia
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn
+          source_sv_ids:
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthAfrica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthCaribbean
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthCentralAmericaExceptMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthLatinAmerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthNorthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthNorthernWesternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthOceania
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthSouthCentralAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthSouthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthSouthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthSouthernEasternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn_PlaceOfBirthWesternAsia
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn
+          source_sv_ids:
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthAfrica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthCaribbean
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthCentralAmericaExceptMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthLatinAmerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthNorthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthNorthernWesternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthOceania
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthSouthCentralAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthSouthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthSouthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthSouthernEasternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn_PlaceOfBirthWesternAsia
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn
+          source_sv_ids:
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthAfrica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthCaribbean
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthCentralAmericaExceptMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthLatinAmerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthNorthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthNorthernWesternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthOceania
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthSouthCentralAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthSouthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthSouthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthSouthernEasternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn_PlaceOfBirthWesternAsia
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn
+          source_sv_ids:
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthAfrica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthCaribbean
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthCentralAmericaExceptMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthLatinAmerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthNorthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthNorthernWesternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthOceania
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthSouthCentralAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthSouthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthSouthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthSouthernEasternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn_PlaceOfBirthWesternAsia
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn
+          source_sv_ids:
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthAfrica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthCaribbean
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthCentralAmericaExceptMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthMexico
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthLatinAmerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthNorthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthNorthernWesternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthOceania
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthSouthCentralAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthSouthEasternAsia
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthSouthamerica
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthSouthernEasternEurope
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn_PlaceOfBirthWesternAsia
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_USCitizenByNaturalization
+          source_sv_ids:
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990OrEarlier_ForeignBorn
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrEarlier_ForeignBorn
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000To2009_ForeignBorn
+            - Count_Person_USCitizenByNaturalization_DateOfEntry1990To1999_ForeignBorn
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn
+            - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn
+
+      # HousingUnit HomeValue.
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_HousingUnit_HomeValue_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+      # Census ACS 5 year, Count_HousingUnit_HomeValue
+        - ancestor_sv_id: Count_HousingUnit_HomeValueUpto49999USDollar
+          source_sv_ids:
+            - Count_HousingUnit_HomeValueUpto10000USDollar
+            - Count_HousingUnit_HomeValue10000To14999USDollar
+            - Count_HousingUnit_HomeValue15000To19999USDollar
+            - Count_HousingUnit_HomeValue20000To24999USDollar
+            - Count_HousingUnit_HomeValue25000To29999USDollar
+            - Count_HousingUnit_HomeValue30000To34999USDollar
+            - Count_HousingUnit_HomeValue35000To39999USDollar
+            - Count_HousingUnit_HomeValue40000To49999USDollar
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_HousingUnit_HomeValue50000To99999USDollar
+          source_sv_ids:
+            - Count_HousingUnit_HomeValue50000To59999USDollar
+            - Count_HousingUnit_HomeValue60000To69999USDollar
+            - Count_HousingUnit_HomeValue70000To79999USDollar
+            - Count_HousingUnit_HomeValue80000To89999USDollar
+            - Count_HousingUnit_HomeValue90000To99999USDollar
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_HousingUnit_HomeValue100000To199999USDollar
+          source_sv_ids:
+            - Count_HousingUnit_HomeValue100000To124999USDollar
+            - Count_HousingUnit_HomeValue125000To149999USDollar
+            - Count_HousingUnit_HomeValue150000To174999USDollar
+            - Count_HousingUnit_HomeValue175000To199999USDollar
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_HousingUnit_HomeValue200000To299999USDollar
+          source_sv_ids:
+            - Count_HousingUnit_HomeValue200000To249999USDollar
+            - Count_HousingUnit_HomeValue250000To299999USDollar
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_HousingUnit_HomeValue300000To499999USDollar
+          source_sv_ids:
+            - Count_HousingUnit_HomeValue300000To399999USDollar
+            - Count_HousingUnit_HomeValue400000To499999USDollar
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_HousingUnit_HomeValue500000To999999USDollar
+          source_sv_ids:
+            - Count_HousingUnit_HomeValue500000To749999USDollar
+            - Count_HousingUnit_HomeValue750000To999999USDollar
+          skip_all_sources_present_check: true
+
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_Person_Age_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+      # Census ACS 5 year, Age group
+        - ancestor_sv_id: Count_Person_55To64Years
+          source_sv_ids:
+            - Count_Person_55To59Years
+            - Count_Person_60To61Years
+            - Count_Person_62To64Years
+          skip_all_sources_present_check: true
+
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_Income_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey
+      - CensusACS5YearSurvey_AggCountry
+    stat_var_aggregation:
+      aggregations:
+      # The following 9 aggregations are: High income (100,000 USD or More), by race.
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_AmericanIndianOrAlaskaNativeAlone
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, American Indian or Alaska Native Alone, Not Worked Full Time
+            - dc/bdl1bpzj3klq4
+            # Population: Female, 100,000 USD or More, American Indian or Alaska Native Alone, Worked Full Time
+            - dc/8v29dc0442ljg
+            # Population: Male, 100,000 USD or More, American Indian or Alaska Native Alone, Not Worked Full Time
+            - dc/4130fr4llf992
+            # Population: Male, 100,000 USD or More, American Indian or Alaska Native Alone, Worked Full Time
+            - dc/ehgx6y0qyncl2
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_AsianAlone
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, Asian Alone, Not Worked Full Time
+            - dc/5zrn0w3nwe9e2
+            # Population: Female, 100,000 USD or More, Asian Alone, Worked Full Time
+            - dc/sk4nqnbdskvhg
+            # Population: Male, 100,000 USD or More, Asian Alone, Not Worked Full Time
+            - dc/w4xl5kcbyjmnb
+            # Population: Male, 100,000 USD or More, Asian Alone, Worked Full Time
+            - dc/68zc6qjdtxllf
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_BlackOrAfricanAmericanAlone
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, Black or African American Alone, Not Worked Full Time
+            - dc/hndxvvnzf2yr1
+            # Population: Female, 100,000 USD or More, Black or African American Alone, Worked Full Time
+            - dc/vf30v7c1t5lz5
+            # Population: Male, 100,000 USD or More, Black or African American Alone, Not Worked Full Time
+            - dc/nm3bftsv2z2w2
+            # Population: Male, 100,000 USD or More, Black or African American Alone, Worked Full Time
+            - dc/whevye7y9vg1c
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_HispanicOrLatino
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, Hispanic or Latino, Not Worked Full Time
+            - dc/nwzb4v1hp2jw4
+            # Population: Female, 100,000 USD or More, Hispanic or Latino, Worked Full Time
+            - dc/w5geg8s1lmb04
+            # Population: Male, 100,000 USD or More, Hispanic or Latino, Not Worked Full Time
+            - dc/xdrmxlpjlcrrb
+            # Population: Male, 100,000 USD or More, Hispanic or Latino, Worked Full Time
+            - dc/espye40zdmhgg
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_NativeHawaiianOrOtherPacificIslanderAlone
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, Native Hawaiian or Other Pacific Islander Alone, Not Worked Full Time
+            - dc/8bwnnflm9kwbg
+            # Population: Female, 100,000 USD or More, Native Hawaiian or Other Pacific Islander Alone, Worked Full Time
+            - dc/4r5pvjszn5x03
+            # Population: Male, 100,000 USD or More, Native Hawaiian or Other Pacific Islander Alone, Not Worked Full Time
+            - dc/mx7edmkkrxvl9
+            # Population: Male, 100,000 USD or More, Native Hawaiian or Other Pacific Islander Alone, Worked Full Time
+            - dc/ce7frpwyfpx85
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_SomeOtherRaceAlone
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, Some Other Race Alone, Not Worked Full Time
+            - dc/y70ppl4rxjhjh
+            # Population: Female, 100,000 USD or More, Some Other Race Alone, Worked Full Time
+            - dc/3zq436nrf2d66
+            # Population: Male, 100,000 USD or More, Some Other Race Alone, Not Worked Full Time
+            - dc/pgrfnkfnswr5d
+            # Population: Male, 100,000 USD or More, Some Other Race Alone, Worked Full Time
+            - dc/cmnkc5fpvpfk9
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_TwoOrMoreRaces
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, Two or More Races, Not Worked Full Time
+            - dc/8lffdmtgqv269
+            # Population: Female, 100,000 USD or More, Two or More Races, Worked Full Time
+            - dc/n4z306vm3zdm6
+            # Population: Male, 100,000 USD or More, Two or More Races, Not Worked Full Time
+            - dc/yr2g2ct310d43
+            # Population: Male, 100,000 USD or More, Two or More Races, Worked Full Time
+            - dc/em5ysxsf96xb8
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_WhiteAlone
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, White Alone, Not Worked Full Time
+            - dc/kd7qcdzs2q8p4
+            # Population: Female, 100,000 USD or More, White Alone, Worked Full Time
+            - dc/gjkbs2qeb9m0g
+            # Population: Male, 100,000 USD or More, White Alone, Not Worked Full Time
+            - dc/dhjfv19drrr9
+            # Population: Male, 100,000 USD or More, White Alone, Worked Full Time
+            - dc/z76nk40e6qtn8
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_WhiteAloneNotHispanicOrLatino
+          source_sv_ids:
+            # Population: Female, 100,000 USD or More, White Alone Not Hispanic or Latino, Not Worked Full Time
+            - dc/elr21hp41d4jh
+            # Population: Female, 100,000 USD or More, White Alone Not Hispanic or Latino, Worked Full Time
+            - dc/cqc4zh1dg8s6d
+            # Population: Male, 100,000 USD or More, White Alone Not Hispanic or Latino, Not Worked Full Time
+            - dc/7g2mm704kfq63
+            # Population: Male, 100,000 USD or More, White Alone Not Hispanic or Latino, Worked Full Time
+            - dc/ekh1g39v9sgj4
+
+      # The following 9 aggregations are: Low income (10,000 - 12,499 USD), by race.
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_AmericanIndianOrAlaskaNativeAlone
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, American Indian or Alaska Native Alone, Not Worked Full Time
+            - dc/g1lqz36qc6x26
+            # Population: Female, 10,000 - 12,499 USD, American Indian or Alaska Native Alone, Worked Full Time
+            - dc/nfj1c6f5xvjpf
+            # Population: Male, 10,000 - 12,499 USD, American Indian or Alaska Native Alone, Not Worked Full Time
+            - dc/1cytg38zh4kmb
+            # Population: Male, 10,000 - 12,499 USD, American Indian or Alaska Native Alone, Worked Full Time
+            - dc/kvpkg8xd66q1d
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_AsianAlone
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, Asian Alone, Not Worked Full Time
+            - dc/rkylr8pzrhvrb
+            # Population: Female, 10,000 - 12,499 USD, Asian Alone, Worked Full Time
+            - dc/ecz488knjvddf
+            # Population: Male, 10,000 - 12,499 USD, Asian Alone, Not Worked Full Time
+            - dc/l481yty608g4
+            # Population: Male, 10,000 - 12,499 USD, Asian Alone, Worked Full Time
+            - dc/f6psb62vxhcgb
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_BlackOrAfricanAmericanAlone
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, Black or African American Alone, Not Worked Full Time
+            - dc/zvn4y702wsxe3
+            # Population: Female, 10,000 - 12,499 USD, Black or African American Alone, Worked Full Time
+            - dc/6jgshc72wscd7
+            # Population: Male, 10,000 - 12,499 USD, Black or African American Alone, Not Worked Full Time
+            - dc/h255gx042f7y6
+            # Population: Male, 10,000 - 12,499 USD, Black or African American Alone, Worked Full Time
+            - dc/pd0td6p7wbgvc
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_HispanicOrLatino
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, Hispanic or Latino, Not Worked Full Time
+            - dc/p5ewnb0xtrrp
+            # Population: Female, 10,000 - 12,499 USD, Hispanic or Latino, Worked Full Time
+            - dc/7ev9gs9ppxbq2
+            # Population: Male, 10,000 - 12,499 USD, Hispanic or Latino, Not Worked Full Time
+            - dc/t5d500j7754qb
+            # Population: Male, 10,000 - 12,499 USD, Hispanic or Latino, Worked Full Time
+            - dc/x2p3338cck7vg
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_NativeHawaiianOrOtherPacificIslanderAlone
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, Native Hawaiian or Other Pacific Islander Alone, Not Worked Full Time
+            - dc/xbq05lkgsx8b6
+            # Population: Female, 10,000 - 12,499 USD, Native Hawaiian or Other Pacific Islander Alone, Worked Full Time
+            - dc/nxqjjpwkkxpx4
+            # Population: Male, 10,000 - 12,499 USD, Native Hawaiian or Other Pacific Islander Alone, Not Worked Full Time
+            - dc/bd5nldln2dxng
+            # Population: Male, 10,000 - 12,499 USD, Native Hawaiian or Other Pacific Islander Alone, Worked Full Time
+            - dc/rgme2je6kbj27
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_SomeOtherRaceAlone
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, Some Other Race Alone, Not Worked Full Time
+            - dc/8ws12v06y0sqh
+            # Population: Female, 10,000 - 12,499 USD, Some Other Race Alone, Worked Full Time
+            - dc/43pxcsbgjksmg
+            # Population: Male, 10,000 - 12,499 USD, Some Other Race Alone, Not Worked Full Time
+            - dc/lj0q1zj5d8df
+            # Population: Male, 10,000 - 12,499 USD, Some Other Race Alone, Worked Full Time
+            - dc/sy61mbcgrsz73
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_TwoOrMoreRaces
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, Two or More Races, Not Worked Full Time
+            - dc/see1v777j4807
+            # Population: Female, 10,000 - 12,499 USD, Two or More Races, Worked Full Time
+            - dc/422zpgm23b2m7
+            # Population: Male, 10,000 - 12,499 USD, Two or More Races, Not Worked Full Time
+            - dc/6k4k59zwbgw92
+            # Population: Male, 10,000 - 12,499 USD, Two or More Races, Worked Full Time
+            - dc/qdrpgr3zby8k
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_WhiteAlone
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, White Alone, Not Worked Full Time
+            - dc/c42c6rmszjdv7
+            # Population: Female, 10,000 - 12,499 USD, White Alone, Worked Full Time
+            - dc/bpzm3y2rxt7y9
+            # Population: Male, 10,000 - 12,499 USD, White Alone, Not Worked Full Time
+            - dc/kv7x6r2cqyxcf
+            # Population: Male, 10,000 - 12,499 USD, White Alone, Worked Full Time
+            - dc/xzg5t6eq4pnph
+        - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_WhiteAloneNotHispanicOrLatino
+          source_sv_ids:
+            # Population: Female, 10,000 - 12,499 USD, White Alone Not Hispanic or Latino, Not Worked Full Time
+            - dc/t6b45v6bsdf1
+            # Population: Female, 10,000 - 12,499 USD, White Alone Not Hispanic or Latino, Worked Full Time
+            - dc/j8dgk3864jpbd
+            # Population: Male, 10,000 - 12,499 USD, White Alone Not Hispanic or Latino, Not Worked Full Time
+            - dc/bv5sd8hrhs82h
+            # Population: Male, 10,000 - 12,499 USD, White Alone Not Hispanic or Latino, Worked Full Time
+            - dc/5jp07brw3g26h
+
+  - type: STAT_VAR_AGGREGATION
+    output_import: IndiaNSS_HealthAilments_StatVarAgg
+    input_imports:
+      - IndiaNSS_HealthAilments
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionBloodDisease_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionBloodDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionBloodDisease_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionCancer_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionCancer_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionCancer_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionCardiovascularDisease_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionEarCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionEarCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionEarCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionEndocrineMetabolicCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionEyeCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionEyeCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionEyeCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionGastroIntestinalCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionGenitoUrinaryCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionInfection_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionInfection_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionInfection_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionInjury_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionInjury_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionInjury_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionMusculoSkeletalCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionNSSHealthOtherCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionObstetricCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionObstetricCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionObstetricCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionPsychiatricNeurologicalCondition_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionRespiratoryDisease_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_MedicalConditionIncident_ConditionSkinDisease_AsAFractionOf_Count_Person
+          source_sv_ids:
+            - Count_MedicalConditionIncident_0To4Years_Female_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Female_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Female_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Female_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Female_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Female_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Female_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Female_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_0To4Years_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_15To29Years_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_30To44Years_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_45To59Years_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_5To14Years_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60OrMoreYears_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_60To69Years_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
+            - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
+          skip_all_sources_present_check: true
+
+  - type: STAT_VAR_AGGREGATION
+    output_import: OECDRegionalDemography_Person_Age_StatVarAgg
+    input_imports:
+      - OECDRegionalDemography_Population
+    stat_var_aggregation:
+      aggregations:
+      # OECD, Age group
+        - ancestor_sv_id: Count_Person_Upto9Years
+          source_sv_ids:
+            - Count_Person_Upto4Years
+            - Count_Person_5To9Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_10To19Years
+          source_sv_ids:
+            - Count_Person_10To14Years
+            - Count_Person_15To19Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_20To29Years
+          source_sv_ids:
+            - Count_Person_20To24Years
+            - Count_Person_25To29Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_30To39Years
+          source_sv_ids:
+            - Count_Person_30To34Years
+            - Count_Person_35To39Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_40To49Years
+          source_sv_ids:
+            - Count_Person_40To44Years
+            - Count_Person_45To49Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_50To59Years
+          source_sv_ids:
+            - Count_Person_50To54Years
+            - Count_Person_55To59Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_60To69Years
+          source_sv_ids:
+            - Count_Person_60To64Years
+            - Count_Person_65To69Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_70OrMoreYears
+          source_sv_ids:
+            - Count_Person_70To74Years
+            - Count_Person_75To79Years
+            - Count_Person_80OrMoreYears
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_5To19Years
+          source_sv_ids:
+            - Count_Person_5To9Years
+            - Count_Person_10To14Years
+            - Count_Person_15To19Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_25To34Years
+          source_sv_ids:
+            - Count_Person_25To29Years
+            - Count_Person_30To34Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_35To44Years
+          source_sv_ids:
+            - Count_Person_35To39Years
+            - Count_Person_40To44Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_45To54Years
+          source_sv_ids:
+            - Count_Person_45To49Years
+            - Count_Person_50To54Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_55To64Years
+          source_sv_ids:
+            - Count_Person_55To59Years
+            - Count_Person_60To64Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_65To74Years
+          source_sv_ids:
+            - Count_Person_65To69Years
+            - Count_Person_70To74Years
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_75OrMoreYears
+          source_sv_ids:
+            - Count_Person_75To79Years
+            - Count_Person_80OrMoreYears
+          skip_all_sources_present_check: true
+
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_SubjectTables_S0801_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey_SubjectTables_S0801
+    stat_var_aggregation:
+      aggregations:
+      # Census ACS 5 year, Work Commute group
+        - ancestor_sv_id: Count_Person_Years16Onwards_WorkCommute_Employed
+          source_sv_ids:
+            - Count_Person_Years16Onwards_CarTruckOrVan_WorkCommute_Employed
+            - Count_Person_Years16Onwards_DroveAlone_WorkCommute_Employed
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_WorkCommute_Employed
+            - Count_Person_Years16Onwards_CarpoolOccupancy2Person_CarTruckOrVanCarpooled_WorkCommute_Employed
+            - Count_Person_Years16Onwards_CarpoolOccupancy3Person_CarTruckOrVanCarpooled_WorkCommute_Employed
+            - Count_Person_Years16Onwards_CarpoolOccupancy4OrMorePerson_CarTruckOrVanCarpooled_WorkCommute_Employed
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_WorkCommute_Employed
+            - Count_Person_Years16Onwards_Walked_WorkCommute_Employed
+            - Count_Person_Years16Onwards_Bicycle_WorkCommute_Employed
+            - Count_Person_Years16Onwards_TaxicabMotorcycleBicycleOrOtherMeans_WorkCommute_Employed
+            - Count_Person_Years16Onwards_WorkedAtHome_WorkCommute_Employed
+            - Count_Person_Years16Onwards_WorkedFromHome_WorkCommute_Employed
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_Years16Onwards_WorkCommute_Employed_Male
+          source_sv_ids:
+            - Count_Person_Years16Onwards_CarTruckOrVan_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_DroveAlone_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_CarpoolOccupancy2Person_CarTruckOrVanCarpooled_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_CarpoolOccupancy3Person_CarTruckOrVanCarpooled_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_CarpoolOccupancy4OrMorePerson_CarTruckOrVanCarpooled_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_Walked_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_Bicycle_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_TaxicabMotorcycleBicycleOrOtherMeans_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_WorkedAtHome_WorkCommute_Employed_Male
+            - Count_Person_Years16Onwards_WorkedFromHome_WorkCommute_Employed_Male
+          skip_all_sources_present_check: true
+        - ancestor_sv_id: Count_Person_Years16Onwards_WorkCommute_Employed_Female
+          source_sv_ids:
+            - Count_Person_Years16Onwards_CarTruckOrVan_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_DroveAlone_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_CarpoolOccupancy2Person_CarTruckOrVanCarpooled_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_CarpoolOccupancy3Person_CarTruckOrVanCarpooled_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_CarpoolOccupancy4OrMorePerson_CarTruckOrVanCarpooled_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_Walked_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_Bicycle_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_TaxicabMotorcycleBicycleOrOtherMeans_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_WorkedAtHome_WorkCommute_Employed_Female
+            - Count_Person_Years16Onwards_WorkedFromHome_WorkCommute_Employed_Female
+          skip_all_sources_present_check: true
+      # Worked outside of Home
+        - ancestor_sv_id: Count_Person_Years16Onwards_WorkCommute_Employed_WorkedOutsideOfHome
+          source_sv_ids:
+            - Count_Person_Years16Onwards_StartTimeHour0000To0459_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0500To0529_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0530To0559_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0600To0629_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0630To0659_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0700To0729_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0730To0759_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0800To0829_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0830To0859_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0900To2359_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_10OrLessMinute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_10To14Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_15To19Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_20To24Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_25To29Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_30To34Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_35To44Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_45To59Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_60OrMoreMinute_WorkCommute_Employed_WorkedOutsideOfHome
+        - ancestor_sv_id: Count_Person_Years16Onwards_WorkCommute_Employed_Male_WorkedOutsideOfHome
+          source_sv_ids:
+            - Count_Person_Years16Onwards_StartTimeHour0000To0459_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0500To0529_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0530To0559_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0600To0629_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0630To0659_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0700To0729_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0730To0759_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0800To0829_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0830To0859_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0900To2359_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_10OrLessMinute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_10To14Minute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_15To19Minute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_20To24Minute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_25To29Minute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_30To34Minute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_35To44Minute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_45To59Minute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_60OrMoreMinute_WorkCommute_Employed_Male_WorkedOutsideOfHome
+        - ancestor_sv_id: Count_Person_Years16Onwards_WorkCommute_Employed_Female_WorkedOutsideOfHome
+          source_sv_ids:
+            - Count_Person_Years16Onwards_StartTimeHour0000To0459_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0500To0529_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0530To0559_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0600To0629_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0630To0659_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0700To0729_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0730To0759_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0800To0829_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0830To0859_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_StartTimeHour0900To2359_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_10OrLessMinute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_10To14Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_15To19Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_20To24Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_25To29Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_30To34Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_35To44Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_45To59Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_60OrMoreMinute_WorkCommute_Employed_Female_WorkedOutsideOfHome
+  - type: STAT_VAR_AGGREGATION
+    output_import: CensusACS5YearSurvey_SubjectTables_S0804_StatVarAgg
+    input_imports:
+      - CensusACS5YearSurvey_SubjectTables_S0804
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_WorkCommute_Employed_WorkedOutsideOfHome
+          source_sv_ids:
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_10OrLessMinute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_10To14Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_15To19Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_20To24Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_25To29Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_30To34Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_35To44Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_45To59Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_60OrMoreMinute_WorkCommute_Employed_WorkedOutsideOfHome
+        - ancestor_sv_id: Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_WorkCommute_Employed_WorkedOutsideOfHome
+          source_sv_ids:
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_10OrLessMinute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_10To14Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_15To19Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_20To24Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_25To29Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_30To34Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_35To44Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_45To59Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanDroveAlone_60OrMoreMinute_WorkCommute_Employed_WorkedOutsideOfHome
+        - ancestor_sv_id: Count_Person_Years16Onwards_CarTruckOrVanCarpooled_WorkCommute_Employed_WorkedOutsideOfHome
+          source_sv_ids:
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_10OrLessMinute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_10To14Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_15To19Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_25To29Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_20To24Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_30To34Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_35To44Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_45To59Minute_WorkCommute_Employed_WorkedOutsideOfHome
+            - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_60OrMoreMinute_WorkCommute_Employed_WorkedOutsideOfHome
+  - type: STAT_VAR_AGGREGATION
+    output_import: India_RBIStateDomesticProduct_StatVarAgg
+    input_imports:
+      - India_RBIStateDomesticProduct
+    stat_var_aggregation:
+      aggregations:
+        - ancestor_sv_id: Nominal_Amount_EconomicActivity_GrossValueAdded
+          source_sv_ids:
+            - Nominal_Amount_EconomicActivity_GrossValueAdded_Agriculture
+            - Nominal_Amount_EconomicActivity_GrossValueAdded_BankingAndInsuranceSector
+            - Nominal_Amount_EconomicActivity_GrossValueAdded_Construction
+            - Nominal_Amount_EconomicActivity_GrossValueAdded_Industry
+            - Nominal_Amount_EconomicActivity_GrossValueAdded_ManufacturingSector
+            - Nominal_Amount_EconomicActivity_GrossValueAdded_Services
+        - ancestor_sv_id: Nominal_Amount_EconomicActivity_NetValueAdded
+          source_sv_ids:
+            - Nominal_Amount_EconomicActivity_NetValueAdded_Agriculture
+            - Nominal_Amount_EconomicActivity_NetValueAdded_Construction
+            - Nominal_Amount_EconomicActivity_NetValueAdded_Industry
+            - Nominal_Amount_EconomicActivity_NetValueAdded_ManufacturingSector
+            - Nominal_Amount_EconomicActivity_NetValueAdded_Services
+        - ancestor_sv_id: Amount_EconomicActivity_GrossValueAdded_RealValue
+          source_sv_ids:
+            - RealValue_Amount_EconomicActivity_GrossValueAdded_Agriculture
+            - RealValue_Amount_EconomicActivity_GrossValueAdded_BankingAndInsuranceSector
+            - RealValue_Amount_EconomicActivity_GrossValueAdded_Construction
+            - RealValue_Amount_EconomicActivity_GrossValueAdded_Industry
+            - RealValue_Amount_EconomicActivity_GrossValueAdded_ManufacturingSector
+            - RealValue_Amount_EconomicActivity_GrossValueAdded_Services
+        - ancestor_sv_id: RealValue_Amount_EconomicActivity_NetValueAdded
+          source_sv_ids:
+            - RealValue_Amount_EconomicActivity_NetValueAdded_Agriculture
+            - RealValue_Amount_EconomicActivity_NetValueAdded_Construction
+            - RealValue_Amount_EconomicActivity_NetValueAdded_Industry
+            - RealValue_Amount_EconomicActivity_NetValueAdded_ManufacturingSector
+            - RealValue_Amount_EconomicActivity_NetValueAdded_Services
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
new file mode 100644
index 000000000..5fd320fb9
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
@@ -0,0 +1,324 @@
+calculations:
+
+# Energy.
+  - type: STAT_VAR_CALCULATION
+    input_imports:
+      - EPA_GHGRP_AggCounty
+      - EPA_GHGRP_AggCensusZipCodeTabulationArea
+      - EPA_GHGRP_AggCounty_AggState
+      - EIA_Electricity
+      - UNEnergy
+    output_import: Energy_StatVarCalculation
+    stat_var_calculation:
+      calculations:
+        - input1:
+            sv_regex: Annual_Emissions_GreenhouseGas_NonBiogenic
+            measurement_method_regex: dcAggregate/EPA_GHGRP
+            facet_info:
+              unit: MetricTonCO2e
+              observation_period: P1Y
+          input2:
+            sv_regex: Annual_Generation_Electricity
+            facet_info:
+              unit: GigawattHour
+        operation: DIVIDE
+          output:
+            sv: Annual_Emissions_GreenhouseGas_NonBiogenic_Per_Annual_Generation_Electricity
+            measurement_method: EPA_GHGRP_EIA_Electricity
+            facet_info:
+              unit: MetricTonCO2ePerGigawattHour
+              observation_period: P1Y
+
+# Climate: Temperature modeling diffs from actual.
+# There are 30+ models. For each model, the output diffs are done for
+# Aggregations: Mean, Min, Max and also for SSP2 with RCP4.5 and SSP5 with
+# RCP8.5 (for each model).
+# This includes diffs for P1M, P1Y, P5Y and P10Y.
+# Note that the computations below use sv_regex and measurement_method_regex
+# instead of sv and measurement_method to allow more expressive matching. Also
+# note that the output uses sv_prefix and measurement_method_prefix which is not
+# the case above, for example. Whenever sv and measurement_method are set in the
+# output, they are preferred. Otherwise, the *_prefix is used.
+  - type: STAT_VAR_CALCULATION
+    input_imports:
+      - NASA_NEXGDDP_CMIP6_Subnational
+      - NASA_NEXGDDP_CMIP6_IpccPlaces50
+      - NOAA_EPA_Observed_Historical_Weather
+      - NASA_NEXDCP30
+      - Copernicus_ECMWF_ERA5_Monthly
+      - Copernicus_ECMWF_ERA5_Monthly_AggrYearsStats
+      - NASA_NEXDCP30_AggrYearsStats
+      - NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrYearsStats
+      - NASA_NEXGDDP_CMIP6_Subnational_AggrYearsStats
+      - NOAA_EPA_Observed_Historical_Weather_AggrYearsStats
+    output_import: Climate_StatVarCalculation
+    stat_var_calculation:
+      calculations:
+        - input1:
+            sv_regex: "^Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+          input2:
+            sv_regex: ^Mean_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+
+      calculations:
+        - input1:
+            sv_regex: "^Min_Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+          input2:
+            sv_regex: ^Min_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+
+      calculations:
+        - input1:
+            sv_regex: "^Max_Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+          input2:
+            sv_regex: ^Max_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P1M
+
+      calculations:
+        - input1:
+            sv_regex: "^Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Mean_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+
+      calculations:
+        - input1:
+            sv_regex: "^Min_Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Min_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+
+      calculations:
+        - input1:
+            sv_regex: "^Max_Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Max_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P1Y
+
+      calculations:
+        - input1:
+            sv_regex: "^Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Mean_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+
+      calculations:
+        - input1:
+            sv_regex: "^Min_Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Min_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+
+      calculations:
+        - input1:
+            sv_regex: "^Max_Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Max_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P5Y
+
+      calculations:
+        - input1:
+            sv_regex: "^Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Mean_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
+
+      calculations:
+        - input1:
+            sv_regex: "^Min_Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Min_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
+
+      calculations:
+        - input1:
+            sv_regex: "^Max_Temperature(_SSP[0-9]+)*$"
+            measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
+              import_name_regex: ".*AggrYearsStats$"
+          input2:
+            sv_regex: ^Max_Temperature$
+            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
+              import_name_regex: ".*AggrYearsStats$"
+        operation: SUBTRACT
+          output:
+            sv_prefix: DifferenceRelativeToObservationalData_
+            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            facet_info:
+              unit: Celsius
+              observation_period: P10Y
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
new file mode 100644
index 000000000..97c7da2e2
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
@@ -0,0 +1,449 @@
+calculations:
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXDCP30
+    round: 1
+    output_import: NASA_NEXDCP30_AggrDiffStats
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - max_diff_across_measurement_methods:
+        - diff_relative_to_base_date:
+          # TODO(b/220070731): Consider adding start_date/end_date spec.
+            date_specs:
+            # There are two series in this import:
+            # 1. Monthly historical data from 1950 to 2005.
+            # 2. Monthly projections from 2006 to 2099.
+            dates:
+              - 1990
+              - 2006
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_Subnational
+    round: 1
+    output_import: NASA_NEXGDDP_Subnational_AggrDiffStats
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - max_diff_across_measurement_methods:
+        - diff_relative_to_base_date:
+            date_specs:
+            start_date: 2006
+            end_date: 2020
+            date_specs:
+            # There are two series in this import:
+            # 1. Monthly historical data from 1950 to 2005.
+            # 2. Monthly projections from 2006 to 2099.
+            dates:
+              - 1990
+              - 2006
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_Country
+    round: 1
+    output_import: NASA_NEXGDDP_Country_AggrDiffStats
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - max_diff_across_measurement_methods:
+        - diff_relative_to_base_date:
+          # TODO(b/220070731): Consider adding start_date/end_date spec.
+            date_specs:
+            # There are two series in this import:
+            # 1. Monthly historical data from 1950 to 2005.
+            # 2. Monthly projections from 2006 to 2099.
+            dates:
+              - 1990
+              - 2006
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_CMIP6_Subnational
+    round: 1
+    output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrDiffStats
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - max_diff_across_measurement_methods:
+        - diff_relative_to_base_date:
+            date_specs:
+            start_date: 2015
+            end_date: 2020
+            date_specs:
+            # There are four series in this import:
+            # 1. Monthly historical data from 1950 to 2014.
+            # 2. Yearly historical data from 1950 to 2014
+            # 3. Monthly projections from 2015 to 2100.
+            # 4. Yearly projections from 2015 to 2100.
+            dates:
+              - 1990
+              - 2015
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_CMIP6_IpccPlaces50
+    round: 1
+    output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrDiffStats
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - max_diff_across_measurement_methods:
+        - diff_relative_to_base_date:
+            date_specs:
+            start_date: 2015
+            end_date: 2020
+            date_specs:
+            # There are four series in this import:
+            # 1. Monthly historical data from 1950 to 2014.
+            # 2. Yearly historical data from 1950 to 2014
+            # 3. Monthly projections from 2015 to 2100.
+            # 4. Yearly projections from 2015 to 2100.
+            dates:
+              - 1990
+              - 2015
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_Subnational_AggrDiffStats
+    round: 2
+    output_import: NASA_NEXGDDP_Subnational_AggrStatsAcrossModels
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - stats_across_models:
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_CMIP6_Subnational_AggrDiffStats
+    round: 2
+    output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - stats_across_models:
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrDiffStats
+    round: 2
+    output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - stats_across_models:
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
+    round: 3
+    output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrOverTimeOnStatsAcrossModels
+    stat_var_series_aggregation:
+      aggr_funcs:
+        # Config for future 10-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P10Y
+            output_obs_date: 2030
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+            aggregation_op: OPERATOR_MEAN
+        # Config for future 30-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P30Y
+            output_obs_date: 2050
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+            aggregation_op: OPERATOR_MEAN
+        # Config for future 80-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P80Y
+            output_obs_date: 2100
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+            aggregation_op: OPERATOR_MEAN
+        # Config for future 10-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P10Y
+            output_obs_date: 2030
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+        # Config for future 30-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P30Y
+            output_obs_date: 2050
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+        # Config for future 80-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P80Y
+            output_obs_date: 2100
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_Subnational_AggrStatsAcrossModels
+    round: 3
+    output_import: NASA_NEXGDDP_Subnational_AggrOverTimeOnStatsAcrossModels
+    stat_var_series_aggregation:
+      aggr_funcs:
+        # Config for future 10-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P10Y
+            output_obs_date: 2030
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+        # Config for future 30-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P30Y
+            output_obs_date: 2050
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+        # Config for future 80-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P80Y
+            output_obs_date: 2100
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+        # Config for future 10-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P10Y
+            output_obs_date: 2030
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+        # Config for future 30-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P30Y
+            output_obs_date: 2050
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+        # Config for future 80-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P80Y
+            output_obs_date: 2100
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
+    round: 3
+    output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrOverTimeOnStatsAcrossModels
+    stat_var_series_aggregation:
+      aggr_funcs:
+        # Config for future 10-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P10Y
+            output_obs_date: 2030
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+            aggregation_op: OPERATOR_MEAN
+        # Config for future 30-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P30Y
+            output_obs_date: 2050
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+            aggregation_op: OPERATOR_MEAN
+        # Config for future 80-year max/min aggregate
+        - aggr_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P80Y
+            output_obs_date: 2100
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            aggregation_op: OPERATOR_MAX
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            aggregation_op: OPERATOR_MIN
+            sv_configs
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+            aggregation_op: OPERATOR_MEAN
+        # Config for future 10-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P10Y
+            output_obs_date: 2030
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+        # Config for future 30-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P30Y
+            output_obs_date: 2050
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+        # Config for future 80-year period of num-months with max/min temp above/below threshold
+        - count_threshold_exception_over_time
+            time_range
+            input_obs_period: P1M
+            output_obs_period: P80Y
+            output_obs_date: 2100
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+            threshold_value: 5
+            unit: Celsius
+            comparison: OPERATOR_GE
+            output_cprop: maxTemperature
+            thresholds
+            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+            threshold_value: -5
+            unit: Celsius
+            comparison: OPERATOR_LE
+            output_cprop: minTemperature
+  - type: STAT_VAR_SERIES_AGGREGATION
+    input_imports:
+      - NASA_NEXGDDP_IpccPlaces50
+    round: 1
+    output_import: NASA_NEXGDDP_IpccPlaces50_AggrDiffStats
+    stat_var_series_aggregation:
+      aggr_funcs:
+        - max_diff_across_measurement_methods:
+        - diff_relative_to_base_date:
+            date_specs:
+            start_date: 2015
+            end_date: 2020
+            date_specs:
+            # There are two series in this import:
+            # 1. Monthly historical data from 1950 to 2005.
+            # 2. Monthly projections from 2006 to 2099.
+            dates:
+              - 1990
+              - 2006
+
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/super_enum.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/super_enum.yaml
new file mode 100644
index 000000000..661d51dcf
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/super_enum.yaml
@@ -0,0 +1,25 @@
+calculations:
+  - type: SUPER_ENUM_AGGREGATION
+    input_imports:
+      - CensusACS5YearSurvey
+    output_import: CensusACS5YearSurvey_SuperEnum
+
+  - type: SUPER_ENUM_AGGREGATION
+    input_imports:
+      - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_AggCountry_SuperEnum
+
+  - type: SUPER_ENUM_AGGREGATION
+    input_imports:
+      - NCES_PrivateSchoolStats
+    output_import: NCES_PrivateSchoolStats_SuperEnum
+
+  - type: SUPER_ENUM_AGGREGATION
+    input_imports:
+      - NCES_PublicSchoolStats
+    output_import: NCES_PublicSchoolStats_SuperEnum
+
+  - type: SUPER_ENUM_AGGREGATION
+    input_imports:
+      - NCES_SchoolDistrictStats
+    output_import: NCES_SchoolDistrictStats_SuperEnum

From 481a4694474e883fe1fe262d18d6553754696cbb Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 15:29:00 +0530
Subject: [PATCH 18/33] style(aggregation): remove internal links and bug
 references from YAML configs

---
 .../aggregation/configs/statvar.yaml                | 13 -------------
 .../aggregation/configs/statvar_series.yaml         |  2 --
 2 files changed, 15 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
index fe6743c5b..6f2896c6e 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
@@ -16,7 +16,6 @@ calculations:
       #    We use that to compute total and by disability-status.
       #
       # Total uninsured
-      # https://screenshot.googleplex.com/v4s43nzoCC9G5Qp
         - ancestor_sv_id: Count_Person_NoHealthInsurance
           source_sv_ids:
             # No Disability
@@ -28,7 +27,6 @@ calculations:
             - dc/96dqj47csvmy8
             - dc/qr4s77egv27q2
       # Total insured
-      # https://screenshot.googleplex.com/ARqaVprXtizwETy
         - ancestor_sv_id: Count_Person_WithHealthInsurance
           source_sv_ids:
             # No Disability
@@ -73,7 +71,6 @@ calculations:
       #    breakdown (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B27001). We
       #    use that to compute gender breakdown.
       #
-      # https://screenshot.googleplex.com/aSP3i8ASJDxct2C
         - ancestor_sv_id: Count_Person_Female_NoHealthInsurance
           source_sv_ids:
             - dc/jx2q10tbnwhf3
@@ -85,7 +82,6 @@ calculations:
             - dc/jydkzj9v0yb2
             - dc/qb6th3c5tcd92
             - dc/zj152fzswgrw6
-      # https://screenshot.googleplex.com/57jf9CpaC7KGd7y
         - ancestor_sv_id: Count_Person_Male_NoHealthInsurance
           source_sv_ids:
             - dc/fwqlkjjch0dg5
@@ -97,7 +93,6 @@ calculations:
             - dc/nn4xz2lv98h5c
             - dc/ywjqcxc244sx2
             - dc/j88b1fy7045j1
-      # https://screenshot.googleplex.com/5buzauZqyhTdnkV
         - ancestor_sv_id: Count_Person_Female_WithHealthInsurance
           source_sv_ids:
             - dc/jjhxlkp3qk2m6
@@ -109,7 +104,6 @@ calculations:
             - dc/y9xxbzfr8yb06
             - dc/whh1y53fj4p69
             - dc/965mdb0c8tef9
-      # https://screenshot.googleplex.com/38JDwJJGKhNLmWM
         - ancestor_sv_id: Count_Person_Male_WithHealthInsurance
           source_sv_ids:
             - dc/ve6s1v1eyrbt7
@@ -220,7 +214,6 @@ calculations:
       - CensusACS5YearSurvey_AggCountry
     stat_var_aggregation:
       aggregations:
-      # https://screenshot.googleplex.com/3Fw42DhNnD5fPLi
         - ancestor_sv_id: Count_Person_SpeakEnglishNotAtAll
           source_sv_ids:
             - dc/edfjy64gmxf6f
@@ -231,7 +224,6 @@ calculations:
             - dc/6yzk8t4e9t6v6
             - dc/0kx32ff9c6d79
             - dc/7cxlkzf56zk26
-      # https://screenshot.googleplex.com/3ExaNScXGyyEP2c
         - ancestor_sv_id: Count_Person_SpeakEnglishNotWell
           source_sv_ids:
             - dc/f8qh4hp1830dg
@@ -242,7 +234,6 @@ calculations:
             - dc/6rzmvxpgqlww6
             - dc/wf1rm5zx34dtg
             - dc/4xykjw3v6n4t3
-      # https://screenshot.googleplex.com/AaaSNUMpQwHRvSZ
         - ancestor_sv_id: Count_Person_SpeakEnglishWell
           source_sv_ids:
             - dc/4n1wg2c7hjem
@@ -271,7 +262,6 @@ calculations:
     stat_var_aggregation:
       aggregations:
       # Women in armed forces
-      # https://screenshot.googleplex.com/3ik3W3g2eqMjkSm
         - ancestor_sv_id: Count_Person_Female_InArmedForces
           source_sv_ids:
             - dc/173smewzddlb
@@ -285,7 +275,6 @@ calculations:
             - dc/lb3lb4mg82mph
             - dc/g2m31qc7q1x64
       # Men in armed forces
-      # https://screenshot.googleplex.com/6dvhYcbmtHsjifQ
         - ancestor_sv_id: Count_Person_Male_InArmedForces
           source_sv_ids:
             - dc/vp1gqv00d2ql3
@@ -694,7 +683,6 @@ calculations:
 #     stat_var_aggregation:
 #       aggregations:
       # # Women Employed
-      # # https://screenshot.googleplex.com/AYFWvJqXa3EB2eU
       #
       # # The ancestor has "armedForcesStatus: dcs:Civilian", but the following sources
       # # don't have. Fix it before adding this to aggregation.
@@ -725,7 +713,6 @@ calculations:
             # # dc/xepldf55yq6s5
             #
             # # Men Employed
-            # # https://screenshot.googleplex.com/C24sFkxNbUpWiQJ
 #         - ancestor_sv_id: Count_Person_Male_Employed
 #           source_sv_ids:
 #             - dc/2s6hps4z1qced
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
index 97c7da2e2..3ce0fbdc4 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
@@ -8,7 +8,6 @@ calculations:
       aggr_funcs:
         - max_diff_across_measurement_methods:
         - diff_relative_to_base_date:
-          # TODO(b/220070731): Consider adding start_date/end_date spec.
             date_specs:
             # There are two series in this import:
             # 1. Monthly historical data from 1950 to 2005.
@@ -44,7 +43,6 @@ calculations:
       aggr_funcs:
         - max_diff_across_measurement_methods:
         - diff_relative_to_base_date:
-          # TODO(b/220070731): Consider adding start_date/end_date spec.
             date_specs:
             # There are two series in this import:
             # 1. Monthly historical data from 1950 to 2005.

From 832726c4af3d27393fb3759e68e1b04fcba4873e Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 15:54:20 +0530
Subject: [PATCH 19/33] feat(aggregation): update schema.json and validator.py
 for YAML calculations configs

---
 .../aggregation/configs/place.yaml            |  51 +-
 .../configs/statvar_calculation.yaml          | 133 ++--
 .../aggregation/configs/statvar_series.yaml   | 587 +++++++-----------
 .../ingestion-helper/aggregation/schema.json  |  98 +--
 .../ingestion-helper/aggregation/validator.py |  83 ++-
 5 files changed, 404 insertions(+), 548 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
index 6d1108f73..892a2b128 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
@@ -1,6 +1,7 @@
 calculations:
   - type: PLACE_AGGREGATION
-    input_imports: CensusACS5YearSurvey
+    input_imports:
+      - CensusACS5YearSurvey
     place_aggregation:
       from_place_types: State
       to_place_types: Country
@@ -8,7 +9,8 @@ calculations:
     output_import: CensusACS5YearSurvey_AggCountry
 
   - type: PLACE_AGGREGATION
-    input_imports: CensusSAHIE
+    input_imports:
+      - CensusSAHIE
     place_aggregation:
       from_place_types: State
       to_place_types: Country
@@ -16,7 +18,8 @@ calculations:
     output_import: CensusSAHIE_AggCountry
 
   - type: PLACE_AGGREGATION
-    input_imports: CDCMortality
+    input_imports:
+      - CDCMortality
     place_aggregation:
       from_place_types: County
       to_place_types: State
@@ -24,7 +27,8 @@ calculations:
     output_import: CDCMortality_AggState
 
   - type: PLACE_AGGREGATION
-    input_imports: CDCMortality_AggState
+    input_imports:
+      - CDCMortality_AggState
     place_aggregation:
       from_place_types: State
       to_place_types: Country
@@ -32,7 +36,8 @@ calculations:
     output_import: CDCMortality_AggState_AggCountry
 
   # - type: PLACE_AGGREGATION
-  #   input_imports: FBIGovCrime
+  #   input_imports:
+  #     - FBIGovCrime
   #   place_aggregation:
   #     from_place_types: State
   #     to_place_types: Country
@@ -40,7 +45,8 @@ calculations:
   #   output_import: FBIGovCrime_AggCountry
 
   - type: PLACE_AGGREGATION
-    input_imports: DEA_ARCOS
+    input_imports:
+      - DEA_ARCOS
     place_aggregation:
       from_place_types: County
       to_place_types: State
@@ -48,7 +54,8 @@ calculations:
     output_import: DEA_ARCOS_AggState
 
   - type: PLACE_AGGREGATION
-    input_imports: DEA_ARCOS_AggState
+    input_imports:
+      - DEA_ARCOS_AggState
     place_aggregation:
       from_place_types: State
       to_place_types: Country
@@ -56,7 +63,8 @@ calculations:
     output_import: DEA_ARCOS_AggState_AggCountry
 
   - type: PLACE_AGGREGATION
-    input_imports: EPA_EJSCREEN
+    input_imports:
+      - EPA_EJSCREEN
     place_aggregation:
       from_place_types: CensusBlockGroup
       to_place_types: CensusTract
@@ -64,7 +72,8 @@ calculations:
     output_import: EPA_EJSCREEN_AggCensusTract
 
   - type: PLACE_AGGREGATION
-    input_imports: EPA_EJSCREEN_AggCensusTract
+    input_imports:
+      - EPA_EJSCREEN_AggCensusTract
     place_aggregation:
       from_place_types: CensusTract
       to_place_types: County
@@ -72,7 +81,8 @@ calculations:
     output_import: EPA_EJSCREEN_AggCensusTract_AggCounty
 
   - type: PLACE_AGGREGATION
-    input_imports: DeepSolar
+    input_imports:
+      - DeepSolar
     place_aggregation:
       from_place_types: CensusBlockGroup
       to_place_types: CensusTract
@@ -80,7 +90,8 @@ calculations:
     output_import: DeepSolar_AggCensusTract
 
   - type: PLACE_AGGREGATION
-    input_imports: DeepSolar_AggCensusTract
+    input_imports:
+      - DeepSolar_AggCensusTract
     place_aggregation:
       from_place_types: CensusTract
       to_place_types: County
@@ -88,7 +99,8 @@ calculations:
     output_import: DeepSolar_AggCensusTract_AggCounty
 
   - type: PLACE_AGGREGATION
-    input_imports: EPA_GHGRP
+    input_imports:
+      - EPA_GHGRP
     place_aggregation:
       from_place_types: EpaReportingFacility
       to_place_types: County
@@ -96,7 +108,8 @@ calculations:
     output_import: EPA_GHGRP_AggCounty
 
   - type: PLACE_AGGREGATION
-    input_imports: EPA_GHGRP
+    input_imports:
+      - EPA_GHGRP
     place_aggregation:
       from_place_types: EpaReportingFacility
       to_place_types: CensusZipCodeTabulationArea
@@ -104,7 +117,8 @@ calculations:
     output_import: EPA_GHGRP_AggCensusZipCodeTabulationArea
 
   - type: PLACE_AGGREGATION
-    input_imports: EPA_GHGRP_AggCounty
+    input_imports:
+      - EPA_GHGRP_AggCounty
     place_aggregation:
       from_place_types: County
       to_place_types: State
@@ -112,7 +126,8 @@ calculations:
     output_import: EPA_GHGRP_AggCounty_AggState
 
   - type: PLACE_AGGREGATION
-    input_imports: RFF_USGridGeo_WeatherVariabilityForecast
+    input_imports:
+      - RFF_USGridGeo_WeatherVariabilityForecast
     place_aggregation:
       from_place_types: GeoGridPlace_0.25Deg
       to_place_types: County
@@ -121,7 +136,8 @@ calculations:
     output_import: RFF_USGridGeo_WeatherVariabilityForecast_AggCounty
 
   - type: PLACE_AGGREGATION
-    input_imports: India_RBIStateDomesticProduct
+    input_imports:
+      - India_RBIStateDomesticProduct
     place_aggregation:
       from_place_types: State
       to_place_types: Country
@@ -130,7 +146,8 @@ calculations:
     output_import: India_RBIStateDomesticProduct_AggCountry
 
   - type: PLACE_AGGREGATION
-    input_imports: India_RBIStateDomesticProduct_StatVarAgg
+    input_imports:
+      - India_RBIStateDomesticProduct_StatVarAgg
     place_aggregation:
       from_place_types: State
       to_place_types: Country
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
index 5fd320fb9..12408e433 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
@@ -21,7 +21,7 @@ calculations:
             sv_regex: Annual_Generation_Electricity
             facet_info:
               unit: GigawattHour
-        operation: DIVIDE
+          operation: DIVIDE
           output:
             sv: Annual_Emissions_GreenhouseGas_NonBiogenic_Per_Annual_Generation_Electricity
             measurement_method: EPA_GHGRP_EIA_Electricity
@@ -61,20 +61,19 @@ calculations:
               unit: Celsius
               observation_period: P1M
           input2:
-            sv_regex: ^Mean_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Mean_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P1M
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P1M
 
-      calculations:
         - input1:
             sv_regex: "^Min_Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -82,20 +81,19 @@ calculations:
               unit: Celsius
               observation_period: P1M
           input2:
-            sv_regex: ^Min_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Min_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P1M
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P1M
 
-      calculations:
         - input1:
             sv_regex: "^Max_Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -103,20 +101,19 @@ calculations:
               unit: Celsius
               observation_period: P1M
           input2:
-            sv_regex: ^Max_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Max_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P1M
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P1M
 
-      calculations:
         - input1:
             sv_regex: "^Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -125,21 +122,20 @@ calculations:
               observation_period: P1Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Mean_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Mean_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P1Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P1Y
 
-      calculations:
         - input1:
             sv_regex: "^Min_Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -148,21 +144,20 @@ calculations:
               observation_period: P1Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Min_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Min_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P1Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P1Y
 
-      calculations:
         - input1:
             sv_regex: "^Max_Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -171,21 +166,20 @@ calculations:
               observation_period: P1Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Max_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Max_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P1Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P1Y
 
-      calculations:
         - input1:
             sv_regex: "^Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -194,21 +188,20 @@ calculations:
               observation_period: P5Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Mean_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Mean_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P5Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P5Y
 
-      calculations:
         - input1:
             sv_regex: "^Min_Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -217,21 +210,20 @@ calculations:
               observation_period: P5Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Min_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Min_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P5Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P5Y
 
-      calculations:
         - input1:
             sv_regex: "^Max_Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -240,21 +232,20 @@ calculations:
               observation_period: P5Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Max_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Max_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P5Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P5Y
 
-      calculations:
         - input1:
             sv_regex: "^Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -263,21 +254,20 @@ calculations:
               observation_period: P10Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Mean_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Mean_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P10Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P10Y
 
-      calculations:
         - input1:
             sv_regex: "^Min_Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -286,21 +276,20 @@ calculations:
               observation_period: P10Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Min_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Min_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P10Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P10Y
 
-      calculations:
         - input1:
             sv_regex: "^Max_Temperature(_SSP[0-9]+)*$"
             measurement_method_regex: "^dcAggregate/NASA_Mean_CMIP6_.*"
@@ -309,16 +298,16 @@ calculations:
               observation_period: P10Y
               import_name_regex: ".*AggrYearsStats$"
           input2:
-            sv_regex: ^Max_Temperature$
-            measurement_method_regex: dcAggregate/NASAGSOD_NASAGHCN_EPA
+            sv_regex: "^Max_Temperature$"
+            measurement_method_regex: "dcAggregate/NASAGSOD_NASAGHCN_EPA"
             facet_info:
               unit: Celsius
               observation_period: P10Y
               import_name_regex: ".*AggrYearsStats$"
-        operation: SUBTRACT
+          operation: SUBTRACT
           output:
-            sv_prefix: DifferenceRelativeToObservationalData_
-            measurement_method_prefix: dcAggregate/NASA_Mean_CMIP6_WithBaseAs_
+            sv_prefix: "DifferenceRelativeToObservationalData_"
+            measurement_method_prefix: "dcAggregate/NASA_Mean_CMIP6_WithBaseAs_"
             facet_info:
               unit: Celsius
               observation_period: P10Y
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
index 3ce0fbdc4..4359a8f93 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
@@ -6,15 +6,13 @@ calculations:
     output_import: NASA_NEXDCP30_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
-        - max_diff_across_measurement_methods:
+        - max_diff_across_measurement_methods: {}
         - diff_relative_to_base_date:
             date_specs:
-            # There are two series in this import:
-            # 1. Monthly historical data from 1950 to 2005.
-            # 2. Monthly projections from 2006 to 2099.
-            dates:
-              - 1990
-              - 2006
+              - dates:
+                  - "1990"
+                  - "2006"
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_Subnational
@@ -22,18 +20,15 @@ calculations:
     output_import: NASA_NEXGDDP_Subnational_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
-        - max_diff_across_measurement_methods:
+        - max_diff_across_measurement_methods: {}
         - diff_relative_to_base_date:
             date_specs:
-            start_date: 2006
-            end_date: 2020
-            date_specs:
-            # There are two series in this import:
-            # 1. Monthly historical data from 1950 to 2005.
-            # 2. Monthly projections from 2006 to 2099.
-            dates:
-              - 1990
-              - 2006
+              - start_date: "2006"
+                end_date: "2020"
+              - dates:
+                  - "1990"
+                  - "2006"
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_Country
@@ -41,15 +36,13 @@ calculations:
     output_import: NASA_NEXGDDP_Country_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
-        - max_diff_across_measurement_methods:
+        - max_diff_across_measurement_methods: {}
         - diff_relative_to_base_date:
             date_specs:
-            # There are two series in this import:
-            # 1. Monthly historical data from 1950 to 2005.
-            # 2. Monthly projections from 2006 to 2099.
-            dates:
-              - 1990
-              - 2006
+              - dates:
+                  - "1990"
+                  - "2006"
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational
@@ -57,20 +50,15 @@ calculations:
     output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
-        - max_diff_across_measurement_methods:
+        - max_diff_across_measurement_methods: {}
         - diff_relative_to_base_date:
             date_specs:
-            start_date: 2015
-            end_date: 2020
-            date_specs:
-            # There are four series in this import:
-            # 1. Monthly historical data from 1950 to 2014.
-            # 2. Yearly historical data from 1950 to 2014
-            # 3. Monthly projections from 2015 to 2100.
-            # 4. Yearly projections from 2015 to 2100.
-            dates:
-              - 1990
-              - 2015
+              - start_date: "2015"
+                end_date: "2020"
+              - dates:
+                  - "1990"
+                  - "2006"
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50
@@ -78,44 +66,43 @@ calculations:
     output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
-        - max_diff_across_measurement_methods:
+        - max_diff_across_measurement_methods: {}
         - diff_relative_to_base_date:
             date_specs:
-            start_date: 2015
-            end_date: 2020
-            date_specs:
-            # There are four series in this import:
-            # 1. Monthly historical data from 1950 to 2014.
-            # 2. Yearly historical data from 1950 to 2014
-            # 3. Monthly projections from 2015 to 2100.
-            # 4. Yearly projections from 2015 to 2100.
-            dates:
-              - 1990
-              - 2015
-  - type: STAT_VAR_SERIES_AGGREGATION
-    input_imports:
-      - NASA_NEXGDDP_Subnational_AggrDiffStats
-    round: 2
-    output_import: NASA_NEXGDDP_Subnational_AggrStatsAcrossModels
-    stat_var_series_aggregation:
-      aggr_funcs:
-        - stats_across_models:
+              - start_date: "2015"
+                end_date: "2020"
+              - dates:
+                  - "1990"
+                  - "2006"
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
-      - NASA_NEXGDDP_CMIP6_Subnational_AggrDiffStats
+      - NASA_NEXGDDP_CMIP6_Subnational
     round: 2
     output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
-        - stats_across_models:
+        - aggr_stats_across_models:
+            sv_regex: "^DifferenceRelativeToObservationalData_.*"
+            aggregation_ops:
+              - OPERATOR_MEDIAN
+              - OPERATOR_PERCENTILE90
+              - OPERATOR_PERCENTILE10
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
-      - NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrDiffStats
+      - NASA_NEXGDDP_CMIP6_IpccPlaces50
     round: 2
     output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
-        - stats_across_models:
+        - aggr_stats_across_models:
+            sv_regex: "^DifferenceRelativeToObservationalData_.*"
+            aggregation_ops:
+              - OPERATOR_MEDIAN
+              - OPERATOR_PERCENTILE90
+              - OPERATOR_PERCENTILE10
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
@@ -124,201 +111,101 @@ calculations:
     stat_var_series_aggregation:
       aggr_funcs:
         # Config for future 10-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P10Y
-            output_obs_date: 2030
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
-            aggregation_op: OPERATOR_MEAN
-        # Config for future 30-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P30Y
-            output_obs_date: 2050
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
-            aggregation_op: OPERATOR_MEAN
-        # Config for future 80-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P80Y
-            output_obs_date: 2100
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
-            aggregation_op: OPERATOR_MEAN
-        # Config for future 10-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P10Y
-            output_obs_date: 2030
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
-        # Config for future 30-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P30Y
-            output_obs_date: 2050
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
-        # Config for future 80-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P80Y
-            output_obs_date: 2100
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
-  - type: STAT_VAR_SERIES_AGGREGATION
-    input_imports:
-      - NASA_NEXGDDP_Subnational_AggrStatsAcrossModels
-    round: 3
-    output_import: NASA_NEXGDDP_Subnational_AggrOverTimeOnStatsAcrossModels
-    stat_var_series_aggregation:
-      aggr_funcs:
-        # Config for future 10-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P10Y
-            output_obs_date: 2030
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
+        - aggr_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P10Y
+              output_obs_date: "2030"
+            sv_configs:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                aggregation_op: OPERATOR_MAX
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                aggregation_op: OPERATOR_MIN
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+                aggregation_op: OPERATOR_MEAN
+
         # Config for future 30-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P30Y
-            output_obs_date: 2050
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
+        - aggr_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P30Y
+              output_obs_date: "2050"
+            sv_configs:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                aggregation_op: OPERATOR_MAX
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                aggregation_op: OPERATOR_MIN
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+                aggregation_op: OPERATOR_MEAN
+
         # Config for future 80-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P80Y
-            output_obs_date: 2100
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
+        - aggr_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P80Y
+              output_obs_date: "2100"
+            sv_configs:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                aggregation_op: OPERATOR_MAX
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                aggregation_op: OPERATOR_MIN
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+                aggregation_op: OPERATOR_MEAN
+
         # Config for future 10-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P10Y
-            output_obs_date: 2030
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
+        - count_threshold_exception_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P10Y
+              output_obs_date: "2030"
+            thresholds:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                threshold_value: 5
+                unit: Celsius
+                comparison: OPERATOR_GE
+                output_cprop: maxTemperature
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                threshold_value: -5
+                unit: Celsius
+                comparison: OPERATOR_LE
+                output_cprop: minTemperature
+
         # Config for future 30-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P30Y
-            output_obs_date: 2050
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
+        - count_threshold_exception_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P30Y
+              output_obs_date: "2050"
+            thresholds:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                threshold_value: 5
+                unit: Celsius
+                comparison: OPERATOR_GE
+                output_cprop: maxTemperature
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                threshold_value: -5
+                unit: Celsius
+                comparison: OPERATOR_LE
+                output_cprop: minTemperature
+
         # Config for future 80-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P80Y
-            output_obs_date: 2100
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
+        - count_threshold_exception_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P80Y
+              output_obs_date: "2100"
+            thresholds:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                threshold_value: 5
+                unit: Celsius
+                comparison: OPERATOR_GE
+                output_cprop: maxTemperature
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                threshold_value: -5
+                unit: Celsius
+                comparison: OPERATOR_LE
+                output_cprop: minTemperature
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
@@ -327,104 +214,101 @@ calculations:
     stat_var_series_aggregation:
       aggr_funcs:
         # Config for future 10-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P10Y
-            output_obs_date: 2030
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
-            aggregation_op: OPERATOR_MEAN
+        - aggr_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P10Y
+              output_obs_date: "2030"
+            sv_configs:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                aggregation_op: OPERATOR_MAX
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                aggregation_op: OPERATOR_MIN
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+                aggregation_op: OPERATOR_MEAN
+
         # Config for future 30-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P30Y
-            output_obs_date: 2050
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
-            aggregation_op: OPERATOR_MEAN
+        - aggr_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P30Y
+              output_obs_date: "2050"
+            sv_configs:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                aggregation_op: OPERATOR_MAX
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                aggregation_op: OPERATOR_MIN
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+                aggregation_op: OPERATOR_MEAN
+
         # Config for future 80-year max/min aggregate
-        - aggr_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P80Y
-            output_obs_date: 2100
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            aggregation_op: OPERATOR_MAX
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            aggregation_op: OPERATOR_MIN
-            sv_configs
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
-            aggregation_op: OPERATOR_MEAN
+        - aggr_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P80Y
+              output_obs_date: "2100"
+            sv_configs:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                aggregation_op: OPERATOR_MAX
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                aggregation_op: OPERATOR_MIN
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate((?!Max|Min).)*_Temperature.*"
+                aggregation_op: OPERATOR_MEAN
+
         # Config for future 10-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P10Y
-            output_obs_date: 2030
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
+        - count_threshold_exception_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P10Y
+              output_obs_date: "2030"
+            thresholds:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                threshold_value: 5
+                unit: Celsius
+                comparison: OPERATOR_GE
+                output_cprop: maxTemperature
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                threshold_value: -5
+                unit: Celsius
+                comparison: OPERATOR_LE
+                output_cprop: minTemperature
+
         # Config for future 30-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P30Y
-            output_obs_date: 2050
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
+        - count_threshold_exception_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P30Y
+              output_obs_date: "2050"
+            thresholds:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                threshold_value: 5
+                unit: Celsius
+                comparison: OPERATOR_GE
+                output_cprop: maxTemperature
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                threshold_value: -5
+                unit: Celsius
+                comparison: OPERATOR_LE
+                output_cprop: minTemperature
+
         # Config for future 80-year period of num-months with max/min temp above/below threshold
-        - count_threshold_exception_over_time
-            time_range
-            input_obs_period: P1M
-            output_obs_period: P80Y
-            output_obs_date: 2100
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
-            threshold_value: 5
-            unit: Celsius
-            comparison: OPERATOR_GE
-            output_cprop: maxTemperature
-            thresholds
-            sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
-            threshold_value: -5
-            unit: Celsius
-            comparison: OPERATOR_LE
-            output_cprop: minTemperature
+        - count_threshold_exception_over_time:
+            time_range:
+              input_obs_period: P1M
+              output_obs_period: P80Y
+              output_obs_date: "2100"
+            thresholds:
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Max_Temperature.*"
+                threshold_value: 5
+                unit: Celsius
+                comparison: OPERATOR_GE
+                output_cprop: maxTemperature
+              - sv_regex: "^(Median|Percentile90|Percentile10)AcrossModels_DifferenceRelativeToBaseDate.*_Min_Temperature.*"
+                threshold_value: -5
+                unit: Celsius
+                comparison: OPERATOR_LE
+                output_cprop: minTemperature
+
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_IpccPlaces50
@@ -432,16 +316,11 @@ calculations:
     output_import: NASA_NEXGDDP_IpccPlaces50_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
-        - max_diff_across_measurement_methods:
+        - max_diff_across_measurement_methods: {}
         - diff_relative_to_base_date:
             date_specs:
-            start_date: 2015
-            end_date: 2020
-            date_specs:
-            # There are two series in this import:
-            # 1. Monthly historical data from 1950 to 2005.
-            # 2. Monthly projections from 2006 to 2099.
-            dates:
-              - 1990
-              - 2006
-
+              - start_date: "2015"
+                end_date: "2020"
+              - dates:
+                  - "1990"
+                  - "2006"
diff --git a/pipeline/workflow/ingestion-helper/aggregation/schema.json b/pipeline/workflow/ingestion-helper/aggregation/schema.json
index b0deba8f6..57805f320 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/schema.json
+++ b/pipeline/workflow/ingestion-helper/aggregation/schema.json
@@ -1,93 +1,41 @@
 {
   "$schema": "http://json-schema.org/draft-07/schema#",
-  "title": "AggregationConfig",
+  "title": "AggregationCalculationsConfig",
   "type": "object",
+  "required": ["calculations"],
+  "additionalProperties": false,
   "properties": {
-    "aggregations": {
+    "calculations": {
       "type": "array",
       "items": {
         "type": "object",
-        "required": ["type", "imports"],
+        "required": ["type", "input_imports"],
         "properties": {
           "type": {
             "type": "string",
-            "enum": ["place", "stat_var", "entity", "linked_edges", "provenance_summary", "stat_var_groups"]
-          },
-          "disabled": {
-            "type": "boolean",
-            "default": false
-          },
-          "stage": {
-            "type": "integer",
-            "minimum": 1,
-            "default": 1
+            "enum": [
+              "PLACE_AGGREGATION",
+              "STAT_VAR_AGGREGATION",
+              "ENTITY_AGGREGATION",
+              "STAT_VAR_SERIES_AGGREGATION",
+              "STAT_VAR_CALCULATION",
+              "SUPER_ENUM_AGGREGATION"
+            ]
           },
-          "imports": {
+          "input_imports": {
             "type": "array",
             "items": { "type": "string" },
             "minItems": 1
-          }
-        },
-        "dependencies": {
-          "type": {
-            "oneOf": [
-              {
-                "properties": {
-                  "type": { "const": "place" },
-                  "source_type": { "type": "string" },
-                  "destination_type": { "type": "string" },
-                  "allow_multiple_to_places": { "type": "boolean" }
-                },
-                "required": ["source_type", "destination_type"]
-              },
-              {
-                "properties": {
-                  "type": { "const": "stat_var" },
-                  "ancestor_sv_id": { "type": "string" },
-                  "source_sv_ids": {
-                    "type": "array",
-                    "items": { "type": "string" },
-                    "minItems": 1
-                  },
-                  "skip_all_sources_present_check": { "type": "boolean" },
-                  "output_import_name": { "type": "string" }
-                },
-                "required": ["ancestor_sv_id", "source_sv_ids"]
-              },
-              {
-                "properties": {
-                  "type": { "const": "entity" },
-                  "entity_types": {
-                    "type": "array",
-                    "items": { "type": "string" }
-                  },
-                  "location_props": {
-                    "type": "array",
-                    "items": { "type": "string" }
-                  },
-                  "date_prop": { "type": "string" },
-                  "agg_date_formats": {
-                    "type": "array",
-                    "items": { "type": "string" }
-                  },
-                  "constraints": {
-                    "type": "array",
-                    "items": { "type": "string" }
-                  }
-                },
-                "required": ["entity_types", "location_props"]
-              },
-              {
-                "properties": {
-                  "type": { "enum": ["linked_edges", "provenance_summary", "stat_var_groups"] }
-                }
-              }
-            ]
-          }
+          },
+          "output_import": { "type": "string" },
+          "round": { "type": "integer" },
+          "place_aggregation": { "type": "object" },
+          "stat_var_aggregation": { "type": "object" },
+          "entity_aggregation": { "type": "object" },
+          "stat_var_series_aggregation": { "type": "object" },
+          "stat_var_calculation": { "type": "object" }
         }
       }
     }
-  },
-  "required": ["aggregations"],
-  "additionalProperties": false
+  }
 }
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator.py b/pipeline/workflow/ingestion-helper/aggregation/validator.py
index c26427e8f..42d6dc2b1 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/validator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator.py
@@ -15,6 +15,7 @@
 """Configuration validator and CLI tool for Data Commons aggregations."""
 
 import argparse
+import glob
 import json
 import logging
 import os
@@ -30,14 +31,14 @@
 
 
 def validate_config(config_file_path: str, schema_file_path: str) -> List[Dict[str, Any]]:
-    """Loads and validates the aggregation YAML configuration against the JSON Schema.
+    """Loads and validates an aggregation YAML configuration file against the JSON Schema.
 
     Args:
-        config_file_path: Path to the aggregation.yaml configuration file.
-        schema_file_path: Path to the aggregation_schema.json validation file.
+        config_file_path: Path to a .yaml configuration file.
+        schema_file_path: Path to the JSON Schema validation file.
 
     Returns:
-        A list of validated aggregation dictionaries.
+        A list of validated calculation dictionaries.
 
     Raises:
         FileNotFoundError: If either the config or schema file is missing.
@@ -75,24 +76,25 @@ def validate_config(config_file_path: str, schema_file_path: str) -> List[Dict[s
         logging.error(f"Schema validation failed for config {config_file_path}: {e.message}")
         raise e
 
-    return config["aggregations"]
+    return config.get("calculations", [])
 
 
 def main():
     """CLI entry point for standalone configuration validation."""
     logging.basicConfig(level=logging.INFO)
-    parser = argparse.ArgumentParser(description="Validate Data Commons aggregation configuration files against the JSON Schema.")
-    
-    # Resolve default paths relative to this script's directory (aggregation/)
+    parser = argparse.ArgumentParser(
+        description="Validate Data Commons aggregation configuration files against the JSON Schema."
+    )
+
     curr_dir = os.path.dirname(os.path.abspath(__file__))
-    default_config = os.path.join(curr_dir, "..", "aggregation.yaml")
+    default_config = os.path.join(curr_dir, "configs")
     default_schema = os.path.join(curr_dir, "schema.json")
 
     parser.add_argument(
         "--config",
         type=str,
         default=default_config,
-        help=f"Path to the aggregation YAML config file (default: {default_config})"
+        help=f"Path to an aggregation YAML config file or directory (default: {default_config})"
     )
     parser.add_argument(
         "--schema",
@@ -103,27 +105,48 @@ def main():
 
     args = parser.parse_args()
 
-    print(f"Validating '{os.path.basename(args.config)}' against '{os.path.basename(args.schema)}'...")
-
-    try:
-        aggregations = validate_config(args.config, args.schema)
-        print(f"{GREEN}[SUCCESS] Configuration is valid!{RESET}")
-        print(f"Parsed {len(aggregations)} aggregation steps successfully.")
-        sys.exit(0)
-    except FileNotFoundError as e:
-        print(f"{RED}[ERROR] File not found: {e}{RESET}", file=sys.stderr)
-        sys.exit(1)
-    except jsonschema.exceptions.ValidationError as e:
-        print(f"{RED}[ERROR] Schema Validation Failed:{RESET}", file=sys.stderr)
-        print(f"{RED}  - Path: {'.'.join(str(p) for p in e.path)}{RESET}", file=sys.stderr)
-        print(f"{RED}  - Message: {e.message}{RESET}", file=sys.stderr)
-        sys.exit(1)
-    except yaml.YAMLError as e:
-        print(f"{RED}[ERROR] YAML Syntax Error: {e}{RESET}", file=sys.stderr)
-        sys.exit(1)
-    except Exception as e:
-        print(f"{RED}[ERROR] Unexpected validation failure: {e}{RESET}", file=sys.stderr)
+    # Collect target config files
+    if os.path.isdir(args.config):
+        yaml_files = sorted(
+            glob.glob(os.path.join(args.config, "*.yaml")) + glob.glob(os.path.join(args.config, "*.yml"))
+        )
+        if not yaml_files:
+            print(f"{RED}[ERROR] No YAML config files found in directory: {args.config}{RESET}", file=sys.stderr)
+            sys.exit(1)
+        print(f"Validating {len(yaml_files)} configuration file(s) in '{args.config}' against '{os.path.basename(args.schema)}'...")
+    else:
+        yaml_files = [args.config]
+        print(f"Validating '{os.path.basename(args.config)}' against '{os.path.basename(args.schema)}'...")
+
+    total_calculations = 0
+    has_error = False
+
+    for file_path in yaml_files:
+        try:
+            calculations = validate_config(file_path, args.schema)
+            total_calculations += len(calculations)
+            print(f"  {GREEN}✓{RESET} {os.path.basename(file_path)} ({len(calculations)} calculation steps)")
+        except FileNotFoundError as e:
+            print(f"  {RED}✗ {os.path.basename(file_path)} - File not found: {e}{RESET}", file=sys.stderr)
+            has_error = True
+        except jsonschema.exceptions.ValidationError as e:
+            print(f"  {RED}✗ {os.path.basename(file_path)} - Schema Validation Failed:{RESET}", file=sys.stderr)
+            print(f"    {RED}- Path: {'.'.join(str(p) for p in e.path)}{RESET}", file=sys.stderr)
+            print(f"    {RED}- Message: {e.message}{RESET}", file=sys.stderr)
+            has_error = True
+        except yaml.YAMLError as e:
+            print(f"  {RED}✗ {os.path.basename(file_path)} - YAML Syntax Error: {e}{RESET}", file=sys.stderr)
+            has_error = True
+        except Exception as e:
+            print(f"  {RED}✗ {os.path.basename(file_path)} - Validation failure: {e}{RESET}", file=sys.stderr)
+            has_error = True
+
+    if has_error:
+        print(f"\n{RED}[FAILURE] Validation failed for one or more configuration files.{RESET}", file=sys.stderr)
         sys.exit(1)
+    else:
+        print(f"\n{GREEN}[SUCCESS] All {len(yaml_files)} configuration file(s) passed validation! ({total_calculations} calculation steps total){RESET}")
+        sys.exit(0)
 
 
 if __name__ == "__main__":

From d6012a76d42e8a2b02ff51e11a55baf0226f3869 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 16:02:50 +0530
Subject: [PATCH 20/33] feat(aggregation): expand schema.json with
 type-specific calculation validation rules

---
 .../aggregation/configs/statvar.yaml          | 118 ++++++++++++++++++
 .../ingestion-helper/aggregation/schema.json  | 115 ++++++++++++++++-
 2 files changed, 232 insertions(+), 1 deletion(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
index 6f2896c6e..c25e7fae2 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
@@ -120,81 +120,145 @@ calculations:
         - ancestor_sv_id: Count_Person_NoPrivateHealthInsurance
           source_sv_ids:
             # Population: 26 - 34 Years, Female, No Private Health Insurance
+            - dc/0kyv91d5902mg
             # Population: 75 Years or More, Male, No Private Health Insurance
+            - dc/1ld59l0sp7z85
             # Population: 6 Years or Less, Male, No Private Health Insurance
+            - dc/27q8fsx1rm8e1
             # Population: 55 - 64 Years, Female, No Private Health Insurance
+            - dc/2v3y7wgqrzm5f
             # Population: 65 - 74 Years, Female, No Private Health Insurance
+            - dc/388xq8h8hdhyh
             # Population: 18 - 24 Years, Female, No Private Health Insurance
+            - dc/40rjc7178vkrd
             # Population: 25 - 34 Years, Male, No Private Health Insurance
+            - dc/4qk2e1c4zhsr8
             # Population: 45 - 54 Years, Female, No Private Health Insurance
+            - dc/60mq1lysnc39g
             # Population: 35 - 44 Years, Female, No Private Health Insurance
+            - dc/9cwrwgd2t1r95
             # Population: 19 - 25 Years, Male, No Private Health Insurance
+            - dc/9xxzewchgl6bd
             # Population: 6 - 17 Years, Female, No Private Health Insurance
+            - dc/b9s112lccg17g
             # Population: 26 - 34 Years, Male, No Private Health Insurance
+            - dc/dsmsdwbj1yqf9
             # Population: 45 - 54 Years, Male, No Private Health Insurance
+            - dc/hycewjthgy8w
             # Population: 6 Years or Less, Female, No Private Health Insurance
+            - dc/n0e5f7zjsbedg
             # Population: 6 - 18 Years, Male, No Private Health Insurance
+            - dc/njeqykf73dng2
             # Population: 19 - 25 Years, Female, No Private Health Insurance
+            - dc/nl2wdjffxpdx3
             # Population: 25 - 34 Years, Female, No Private Health Insurance
+            - dc/s11tttbvg3dzh
             # Population: 55 - 64 Years, Male, No Private Health Insurance
+            - dc/v6d0158t101s7
             # Population: 65 - 74 Years, Male, No Private Health Insurance
+            - dc/v80klfl3e9m7f
             # Population: 6 - 18 Years, Female, No Private Health Insurance
+            - dc/w77cv2c32fck7
             # Population: 18 - 24 Years, Male, No Private Health Insurance
+            - dc/wch97nj65s85f
             # Population: 75 Years or More, Female, No Private Health Insurance
+            - dc/wrz9m375r1yy
             # Population: 35 - 44 Years, Male, No Private Health Insurance
+            - dc/wyfvejhn9fe7
             # Population: 6 - 17 Years, Male, No Private Health Insurance
+            - dc/yfmzp444fj5r5
       # dc/g/Person_HealthInsurance-NoPublicHealthInsurance
       # Level 3
         - ancestor_sv_id: Count_Person_NoPublicHealthInsurance
           source_sv_ids:
             # Population: 26 - 34 Years, Female, No Public Health Insurance
+            - dc/0l10mjl6jcb43
             # Population: 65 - 74 Years, Male, No Public Health Insurance
+            - dc/34kcegr057z16
             # Population: 19 - 25 Years, Female, No Public Health Insurance
+            - dc/3l656p2tqt8m9
             # Population: 45 - 54 Years, Female, No Public Health Insurance
+            - dc/81xpj06sdg2kd
             # Population: 26 - 34 Years, Male, No Public Health Insurance
+            - dc/cpcn8j7mw3tx3
             # Population: 35 - 44 Years, Male, No Public Health Insurance
+            - dc/e922mrcqvemm6
             # Population: 19 - 25 Years, Male, No Public Health Insurance
+            - dc/edh0y8vjc9y55
             # Population: 6 - 18 Years, Male, No Public Health Insurance
+            - dc/efg2r5lbj2rs
             # Population: 65 - 74 Years, Female, No Public Health Insurance
+            - dc/fz5dn8yl6m516
             # Population: 45 - 54 Years, Male, No Public Health Insurance
+            - dc/g9qp5cejpyhq5
             # Population: 75 Years or More, Male, No Public Health Insurance
+            - dc/gc7lcsrrv1rm5
             # Population: 6 - 18 Years, Female, No Public Health Insurance
+            - dc/grj8xq0mmr602
             # Population: 35 - 44 Years, Female, No Public Health Insurance
+            - dc/h8123v7kk1xr
             # Population: 55 - 64 Years, Male, No Public Health Insurance
+            - dc/jrh189de6x30h
             # Population: 6 - 17 Years, Male, No Public Health Insurance
+            - dc/ljcepzzm38m7h
             # Population: 18 - 24 Years, Female, No Public Health Insurance
+            - dc/mhfv62j3s22rc
             # Population: 18 - 24 Years, Male, No Public Health Insurance
+            - dc/p8pjrf6ffdeb
             # Population: 75 Years or More, Female, No Public Health Insurance
+            - dc/qtmer196cydk
             # Population: 6 Years or Less, Female, No Public Health Insurance
+            - dc/r4yy8tmxk2pb
             # Population: 25 - 34 Years, Male, No Public Health Insurance
+            - dc/t8p05s93nvjq1
             # Population: 6 - 17 Years, Female, No Public Health Insurance
+            - dc/v1ez3g1cnv2b2
             # Population: 6 Years or Less, Male, No Public Health Insurance
+            - dc/v7xhmvehm80ph
             # Population: 55 - 64 Years, Female, No Public Health Insurance
+            - dc/vxecjxxbmhy43
             # Population: 25 - 34 Years, Female, No Public Health Insurance
+            - dc/yjxmx3n02dx08
       # dc/g/Person_HealthInsurance-WithOneTypeOfHealthInsurance
       # Level 1
         - ancestor_sv_id: Count_Person_WithOneTypeOfHealthInsurance
           source_sv_ids:
             # Population: 18 Years or Less, With One Type of Health Insurance
+            - dc/3jbqx1kf03nd4
             # Population: 18 - 34 Years, With One Type of Health Insurance
+            - dc/3s8t9m1k1pgw5
             # Population: 35 - 64 Years, With One Type of Health Insurance
+            - dc/hcvy4028mk048
             # Population: 19 Years or Less, With One Type of Health Insurance
+            - dc/sjvp50nwdlpmf
             # Population: 19 - 34 Years, With One Type of Health Insurance
+            - dc/thcbepzex9zd9
             # Population: 65 Years or More, With One Type of Health Insurance
+            - dc/v1w52jrtvw6m2
       # dc/g/Person_HealthInsurance-WithPublicCoverage
       # Level 3
         - ancestor_sv_id: Count_Person_WithPublicCoverage
           source_sv_ids:
             # Population: 18 Years or Less, With Disability, With Public Coverage
+            - dc/0f66x8keewypb
             # Population: 19 Years or Less, With Disability, With Public Coverage
+            - dc/1s4eg9f8m6328
             # Population: 19 Years or Less, No Disability, With Public Coverage
+            - dc/35b9fkeh2qlf
             # Population: 65 Years or More, No Disability, With Public Coverage
+            - dc/7cj9xhqe5262d
             # Population: 19 - 64 Years, No Disability, With Public Coverage
+            - dc/84cnddqz2egl6
             # Population: 65 Years or More, With Disability, With Public Coverage
+            - dc/nqtzsgv2g5h09
             # Population: 18 - 64 Years, With Disability, With Public Coverage
+            - dc/qekes8xvrvz18
             # Population: 18 Years or Less, No Disability, With Public Coverage
+            - dc/r7975mdk9v7kh
             # Population: 18 - 64 Years, No Disability, With Public Coverage
+            - dc/t41xetm1g0q6h
             # Population: 19 - 64 Years, With Disability, With Public Coverage
+            - dc/zkcy0f52ewh04
 
       #
       # Ability to speak English
@@ -326,83 +390,137 @@ calculations:
         - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_5ThAnd6ThGrade
           source_sv_ids:
             # Population: 5th And 6th Grade, Female
+            - Count_Person_25OrMoreYears_EducationalAttainment5ThAnd6ThGrade_Female
             # Population: 5th And 6th Grade, Male
+            - Count_Person_25OrMoreYears_EducationalAttainment5ThAnd6ThGrade_Male
       # dc/g/Person_EducationalAttainment-7ThAnd8ThGrade
       # Level 1
         - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_7ThAnd8ThGrade
           source_sv_ids:
             # Population: 7th And 8th Grade, Female
+            - Count_Person_25OrMoreYears_EducationalAttainment7ThAnd8ThGrade_Female
             # Population: 7th And 8th Grade, Male
+            - Count_Person_25OrMoreYears_EducationalAttainment7ThAnd8ThGrade_Male
       # dc/g/Person_EducationalAttainment-9ThTo12ThGradeNoDiploma
       # Level 3
         - ancestor_sv_id: Count_Person_EducationalAttainment_9ThTo12ThGradeNoDiploma
           source_sv_ids:
             # Population: 18 - 24 Years, 9th To 12th Grade No Diploma, Female
+            - dc/g0y4zefyr10n7
             # Population: 18 - 24 Years, 9th To 12th Grade No Diploma, Male
+            - dc/0kptwgt2r2j97
             # Population: 25 - 34 Years, 9th To 12th Grade No Diploma, Female
+            - dc/4f3r13e4gkhq6
             # Population: 25 - 34 Years, 9th To 12th Grade No Diploma, Male
+            - dc/tcv30t9ydkldc
             # Population: 35 - 44 Years, 9th To 12th Grade No Diploma, Female
+            - dc/7k5lylyz4l2th
             # Population: 35 - 44 Years, 9th To 12th Grade No Diploma, Male
+            - dc/nrmj89cggsfg6
             # Population: 45 - 64 Years, 9th To 12th Grade No Diploma, Female
+            - dc/m3g8esl121v73
             # Population: 45 - 64 Years, 9th To 12th Grade No Diploma, Male
+            - dc/3hfl00nblgvg5
             # Population: 65 Years or More, 9th To 12th Grade No Diploma, Female
+            - dc/5sps7rmylm73b
             # Population: 65 Years or More, 9th To 12th Grade No Diploma, Male
+            - dc/07hctc6f9e2k9
       # dc/g/Person_EducationalAttainment-LessThan9ThGrade
       # Level 3
         - ancestor_sv_id: Count_Person_EducationalAttainment_LessThan9ThGrade
           source_sv_ids:
             # Population: 18 - 24 Years, Less Than 9th Grade, Female
+            - dc/935ftchsz31b1
             # Population: 18 - 24 Years, Less Than 9th Grade, Male
+            - dc/n6v6b8vh8jnd5
             # Population: 25 - 34 Years, Less Than 9th Grade, Female
+            - dc/dr8wcfnpxyrj7
             # Population: 25 - 34 Years, Less Than 9th Grade, Male
+            - dc/k8g20v452g617
             # Population: 35 - 44 Years, Less Than 9th Grade, Female
+            - dc/y051838618t9c
             # Population: 35 - 44 Years, Less Than 9th Grade, Male
+            - dc/cpx5ll8vth2n7
             # Population: 45 - 64 Years, Less Than 9th Grade, Female
+            - dc/b841qxd3smpt
             # Population: 45 - 64 Years, Less Than 9th Grade, Male
+            - dc/5vhc0r8m4x5g9
             # Population: 65 Years or More, Less Than 9th Grade, Female
+            - dc/292723k92k5tb
             # Population: 65 Years or More, Less Than 9th Grade, Male
+            - dc/68pblb53csteb
       # dc/g/Person_EducationalAttainment-LessThanHighSchoolDiploma
       # Level 3
         - ancestor_sv_id: Count_Person_EducationalAttainment_LessThanHighSchoolDiploma
           source_sv_ids:
             # Population: Less Than High School Diploma, Male, Two or More Races
+            - dc/3g5g2k9n19l45
             # Population: Less Than High School Diploma, Male, Asian Alone
+            - dc/z3z44q7edbyp7
             # Population: Less Than High School Diploma, Female, Hispanic or Latino
+            - dc/0p89e7q72edr1
             # Population: Less Than High School Diploma, Male, White Alone
+            - dc/70kcr1l5jldsb
             # Population: Less Than High School Diploma, Female, Asian Alone
+            - dc/nbfp13v6t9v87
             # Population: Less Than High School Diploma, Male, Hispanic or Latino
+            - dc/szpdtvh1v6p49
             # Population: Less Than High School Diploma, Male, Some Other Race Alone
+            - dc/pyflt85w79j35
             # Population: Less Than High School Diploma, Female, White Alone Not Hispanic or Latino
+            - dc/vhxsz5bplmef4
             # Population: Less Than High School Diploma, Male, Black or African American Alone
+            - dc/k51n39gvd22t5
             # Population: Less Than High School Diploma, Female, Black or African American Alone
+            - dc/m1trmbvh8sygd
             # Population: Less Than High School Diploma, Male, Native Hawaiian or Other Pacific Islander Alone
+            - dc/7n0468s56spt8
             # Population: Less Than High School Diploma, Male, American Indian or Alaska Native Alone
+            - dc/5e4qydg4slyvd
             # Population: Less Than High School Diploma, Female, Native Hawaiian or Other Pacific Islander Alone
+            - dc/y95cbfgtlqrmb
             # Population: Less Than High School Diploma, Female, Some Other Race Alone
+            - dc/p7978m8tbsns6
             # Population: Less Than High School Diploma, Female, American Indian or Alaska Native Alone
+            - dc/lyzcfc37eet64
             # Population: Less Than High School Diploma, Female, White Alone
+            - dc/m4q40nln3qms
             # Population: Less Than High School Diploma, Male, White Alone Not Hispanic or Latino
+            - dc/yfrrvevrmyr74
             # Population: Less Than High School Diploma, Female, Two or More Races
+            - dc/29l3m1z7d3n7c
       # dc/g/Person_EducationalAttainment-NurseryTo4ThGrade
       # Level 1
         - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_NurseryTo4ThGrade
           source_sv_ids:
             # Population: Nursery To 4th Grade, Female
+            - Count_Person_25OrMoreYears_EducationalAttainmentNurseryTo4ThGrade_Female
             # Population: Nursery To 4th Grade, Male
+            - Count_Person_25OrMoreYears_EducationalAttainmentNurseryTo4ThGrade_Male
       # dc/g/Person_EducationalAttainment-SomeCollegeNoDegree
       # Level 3
         - ancestor_sv_id: Count_Person_EducationalAttainment_SomeCollegeNoDegree
           source_sv_ids:
             # Population: 18 - 24 Years, Some College No Degree, Female
+            - dc/x8sydp80d61v9
             # Population: 18 - 24 Years, Some College No Degree, Male
+            - dc/bxs5x91p5jbf
             # Population: 25 - 34 Years, Some College No Degree, Female
+            - dc/z8j7q9303cpx8
             # Population: 25 - 34 Years, Some College No Degree, Male
+            - dc/d4p0m2p51v7d3
             # Population: 35 - 44 Years, Some College No Degree, Female
+            - dc/66nscflhly519
             # Population: 35 - 44 Years, Some College No Degree, Male
+            - dc/q4eclxrhd5mrd
             # Population: 45 - 64 Years, Some College No Degree, Female
+            - dc/dsmks2epl3ve
             # Population: 45 - 64 Years, Some College No Degree, Male
+            - dc/6ms49l0q9tq44
             # Population: 65 Years or More, Some College No Degree, Female
+            - dc/c0vyt03x0h0v4
             # Population: 65 Years or More, Some College No Degree, Male
+            - dc/5s08n6kdt26db
         - ancestor_sv_id: Count_Person_EducationalAttainment_1StTo12ThGrade
           source_sv_ids:
             - Count_Person_EducationalAttainment1StGrade
diff --git a/pipeline/workflow/ingestion-helper/aggregation/schema.json b/pipeline/workflow/ingestion-helper/aggregation/schema.json
index 57805f320..f6cad1f71 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/schema.json
+++ b/pipeline/workflow/ingestion-helper/aggregation/schema.json
@@ -34,7 +34,120 @@
           "entity_aggregation": { "type": "object" },
           "stat_var_series_aggregation": { "type": "object" },
           "stat_var_calculation": { "type": "object" }
-        }
+        },
+        "allOf": [
+          {
+            "if": { "properties": { "type": { "const": "PLACE_AGGREGATION" } } },
+            "then": {
+              "required": ["place_aggregation"],
+              "properties": {
+                "place_aggregation": {
+                  "type": "object",
+                  "required": ["from_place_types", "to_place_types"],
+                  "properties": {
+                    "from_place_types": { "type": "string" },
+                    "to_place_types": { "type": "string" },
+                    "allow_multiple_to_places": { "type": "boolean" }
+                  }
+                }
+              }
+            }
+          },
+          {
+            "if": { "properties": { "type": { "const": "STAT_VAR_AGGREGATION" } } },
+            "then": {
+              "required": ["stat_var_aggregation"],
+              "properties": {
+                "stat_var_aggregation": {
+                  "type": "object",
+                  "required": ["aggregations"],
+                  "properties": {
+                    "aggregations": {
+                      "type": "array",
+                      "items": {
+                        "type": "object",
+                        "required": ["ancestor_sv_id", "source_sv_ids"],
+                        "properties": {
+                          "ancestor_sv_id": { "type": "string" },
+                          "source_sv_ids": {
+                            "type": "array",
+                            "items": { "type": "string" },
+                            "minItems": 1
+                          },
+                          "skip_all_sources_present_check": { "type": "boolean" }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          },
+          {
+            "if": { "properties": { "type": { "const": "ENTITY_AGGREGATION" } } },
+            "then": {
+              "required": ["entity_aggregation"],
+              "properties": {
+                "entity_aggregation": {
+                  "type": "object",
+                  "required": ["entity_types", "location_props", "date_prop", "agg_date_formats"],
+                  "properties": {
+                    "entity_types": {
+                      "type": "array",
+                      "items": { "type": "string" },
+                      "minItems": 1
+                    },
+                    "location_props": {
+                      "type": "array",
+                      "items": { "type": "string" },
+                      "minItems": 1
+                    },
+                    "date_prop": { "type": "string" },
+                    "agg_date_formats": {
+                      "type": "array",
+                      "items": { "type": "string" },
+                      "minItems": 1
+                    },
+                    "constraints": {
+                      "type": "array",
+                      "items": { "type": "string" }
+                    }
+                  }
+                }
+              }
+            }
+          },
+          {
+            "if": { "properties": { "type": { "const": "STAT_VAR_SERIES_AGGREGATION" } } },
+            "then": {
+              "required": ["stat_var_series_aggregation"],
+              "properties": {
+                "stat_var_series_aggregation": {
+                  "type": "object",
+                  "required": ["aggr_funcs"],
+                  "properties": {
+                    "aggr_funcs": { "type": "array" }
+                  }
+                }
+              }
+            }
+          },
+          {
+            "if": { "properties": { "type": { "const": "STAT_VAR_CALCULATION" } } },
+            "then": {
+              "required": ["stat_var_calculation"],
+              "properties": {
+                "stat_var_calculation": {
+                  "type": "object",
+                  "required": ["calculations"],
+                  "properties": {
+                    "calculations": { "type": "array" }
+                  }
+                }
+              }
+            }
+          }
+        ]
       }
     }
   }

From d4f6690b709648cb78e6b4489039de9862c4836b Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 16:05:27 +0530
Subject: [PATCH 21/33] test(aggregation): update validator_test.py for
 calculations schema

---
 .../aggregation/validator_test.py             | 279 +++++++++---------
 1 file changed, 140 insertions(+), 139 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
index 617e43a97..35f94ef04 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
@@ -33,59 +33,86 @@ class TestValidatorSuccess(unittest.TestCase):
     def setUp(self):
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
         self.tmpdir = tempfile.TemporaryDirectory()
-        self.config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+        self.config_path = os.path.join(self.tmpdir.name, "config.yaml")
 
     def tearDown(self):
         self.tmpdir.cleanup()
 
     def test_validate_config_success_all_types(self):
-        """Verifies that a comprehensive, valid config with all types passes validation."""
+        """Verifies that a comprehensive, valid config with all 6 calculation types passes validation."""
         valid_all_types_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: linked_edges
-                imports: ["*"]
-                stage: 1
-                disabled: false
-
-              - type: place
-                source_type: County
-                destination_type: State
-                allow_multiple_to_places: true
-                imports: ["ImportA", "ImportB"]
-                stage: 2
-
-              - type: stat_var
-                ancestor_sv_id: Count_Person
-                source_sv_ids: ["Count_Person_Male", "Count_Person_Female"]
-                skip_all_sources_present_check: true
-                output_import_name: "Aggregated_Pop"
-                imports: ["ImportC"]
-                stage: 3
-
-              - type: entity
-                entity_types: ["MortalityEvent"]
-                location_props: ["location"]
-                date_prop: "date"
-                agg_date_formats: ["%Y"]
-                imports: ["ImportD"]
-
-              - type: provenance_summary
-                imports: ["*"]
-
-              - type: stat_var_groups
-                imports: ["*"]
+            calculations:
+              - type: PLACE_AGGREGATION
+                input_imports:
+                  - CensusACS5YearSurvey
+                output_import: CensusACS5YearSurvey_AggCountry
+                round: 1
+                place_aggregation:
+                  from_place_types: State
+                  to_place_types: Country
+
+              - type: STAT_VAR_AGGREGATION
+                output_import: CensusACS5YearSurvey_HealthInsurance_StatVarAgg
+                input_imports:
+                  - CensusACS5YearSurvey
+                stat_var_aggregation:
+                  aggregations:
+                    - ancestor_sv_id: Count_Person_NoHealthInsurance
+                      source_sv_ids:
+                        - dc/y0dvhk0sggzef
+
+              - type: ENTITY_AGGREGATION
+                output_import: FireFAMWEB_Agg
+                input_imports:
+                  - FireFAMWEB
+                entity_aggregation:
+                  entity_types:
+                    - BurnedArea
+                  location_props:
+                    - location
+                  date_prop: startDate
+                  agg_date_formats:
+                    - "%Y"
+
+              - type: STAT_VAR_SERIES_AGGREGATION
+                input_imports:
+                  - NASA_NEXDCP30
+                round: 1
+                output_import: NASA_NEXDCP30_AggrDiffStats
+                stat_var_series_aggregation:
+                  aggr_funcs:
+                    - max_diff_across_measurement_methods: {}
+
+              - type: STAT_VAR_CALCULATION
+                input_imports:
+                  - EIA_Electricity
+                output_import: Energy_StatVarCalculation
+                stat_var_calculation:
+                  calculations:
+                    - input1:
+                        sv_regex: Annual_Emissions
+                      operation: DIVIDE
+                      output:
+                        sv: Annual_Emissions_Per_Capita
+
+              - type: SUPER_ENUM_AGGREGATION
+                input_imports:
+                  - CensusACS5YearSurvey
+                output_import: CensusACS5YearSurvey_SuperEnum
         """)
 
         with open(self.config_path, "w") as f:
             f.write(valid_all_types_yaml)
 
-        aggregations = validate_config(self.config_path, self.schema_path)
-        
-        self.assertEqual(len(aggregations), 6)
-        self.assertEqual(aggregations[0]["type"], "linked_edges")
-        self.assertEqual(aggregations[1]["source_type"], "County")
-        self.assertEqual(aggregations[2]["ancestor_sv_id"], "Count_Person")
-        self.assertEqual(aggregations[3]["entity_types"], ["MortalityEvent"])
+        calculations = validate_config(self.config_path, self.schema_path)
+
+        self.assertEqual(len(calculations), 6)
+        self.assertEqual(calculations[0]["type"], "PLACE_AGGREGATION")
+        self.assertEqual(calculations[1]["type"], "STAT_VAR_AGGREGATION")
+        self.assertEqual(calculations[2]["type"], "ENTITY_AGGREGATION")
+        self.assertEqual(calculations[3]["type"], "STAT_VAR_SERIES_AGGREGATION")
+        self.assertEqual(calculations[4]["type"], "STAT_VAR_CALCULATION")
+        self.assertEqual(calculations[5]["type"], "SUPER_ENUM_AGGREGATION")
 
 
 class TestValidatorSchemaConstraints(unittest.TestCase):
@@ -94,7 +121,7 @@ class TestValidatorSchemaConstraints(unittest.TestCase):
     def setUp(self):
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
         self.tmpdir = tempfile.TemporaryDirectory()
-        self.config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+        self.config_path = os.path.join(self.tmpdir.name, "config.yaml")
 
     def tearDown(self):
         self.tmpdir.cleanup()
@@ -102,35 +129,36 @@ def tearDown(self):
     def test_validate_config_missing_type(self):
         """Verifies that missing the required 'type' field raises ValidationError."""
         invalid_missing_type_yaml = textwrap.dedent("""\
-            aggregations:
-              - imports: ["*"]
+            calculations:
+              - input_imports:
+                  - ImportA
         """)
         with open(self.config_path, "w") as f:
             f.write(invalid_missing_type_yaml)
 
         with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
             validate_config(self.config_path, self.schema_path)
-        self.assertIn("'type' is a required property", ctx.exception.message)
+        self.assertEqual(ctx.exception.validator, "required")
 
-    def test_validate_config_missing_imports(self):
-        """Verifies that missing the required 'imports' field raises ValidationError."""
+    def test_validate_config_missing_input_imports(self):
+        """Verifies that missing the required 'input_imports' field raises ValidationError."""
         invalid_missing_imports_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: linked_edges
+            calculations:
+              - type: SUPER_ENUM_AGGREGATION
         """)
         with open(self.config_path, "w") as f:
             f.write(invalid_missing_imports_yaml)
 
         with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
             validate_config(self.config_path, self.schema_path)
-        self.assertIn("'imports' is a required property", ctx.exception.message)
+        self.assertIn("'input_imports' is a required property", ctx.exception.message)
 
-    def test_validate_config_invalid_imports_type(self):
-        """Verifies that imports field being a string instead of an array raises ValidationError."""
+    def test_validate_config_invalid_input_imports_type(self):
+        """Verifies that input_imports field being a string instead of an array raises ValidationError."""
         invalid_imports_type_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: linked_edges
-                imports: "*"
+            calculations:
+              - type: SUPER_ENUM_AGGREGATION
+                input_imports: "SingleImportString"
         """)
         with open(self.config_path, "w") as f:
             f.write(invalid_imports_type_yaml)
@@ -139,42 +167,12 @@ def test_validate_config_invalid_imports_type(self):
             validate_config(self.config_path, self.schema_path)
         self.assertIn("is not of type 'array'", ctx.exception.message)
 
-    def test_validate_config_invalid_stage_type(self):
-        """Verifies that stage field being a string instead of an integer raises ValidationError."""
-        invalid_stage_type_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: linked_edges
-                imports: ["*"]
-                stage: "first"
-        """)
-        with open(self.config_path, "w") as f:
-            f.write(invalid_stage_type_yaml)
-
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config(self.config_path, self.schema_path)
-        self.assertIn("is not of type 'integer'", ctx.exception.message)
-
-    def test_validate_config_invalid_stage_value(self):
-        """Verifies that a stage value of 0 (minimum is 1) raises ValidationError."""
-        invalid_stage_value_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: linked_edges
-                imports: ["*"]
-                stage: 0
-        """)
-        with open(self.config_path, "w") as f:
-            f.write(invalid_stage_value_yaml)
-
-        with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
-            validate_config(self.config_path, self.schema_path)
-        self.assertIn("is less than the minimum of 1", ctx.exception.message)
-
-    def test_validate_config_empty_imports_list(self):
-        """Verifies that an empty imports list raises ValidationError."""
+    def test_validate_config_empty_input_imports_list(self):
+        """Verifies that an empty input_imports list raises ValidationError."""
         invalid_empty_imports_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: linked_edges
-                imports: []
+            calculations:
+              - type: SUPER_ENUM_AGGREGATION
+                input_imports: []
         """)
         with open(self.config_path, "w") as f:
             f.write(invalid_empty_imports_yaml)
@@ -183,17 +181,17 @@ def test_validate_config_empty_imports_list(self):
             validate_config(self.config_path, self.schema_path)
         self.assertIn("should be non-empty", ctx.exception.message)
 
-    def test_validate_config_missing_aggregations_key(self):
-        """Verifies that missing the required 'aggregations' root key raises ValidationError."""
-        missing_aggregations_yaml = textwrap.dedent("""\
+    def test_validate_config_missing_calculations_key(self):
+        """Verifies that missing the required 'calculations' root key raises ValidationError."""
+        missing_calculations_yaml = textwrap.dedent("""\
             some_other_key: []
         """)
         with open(self.config_path, "w") as f:
-            f.write(missing_aggregations_yaml)
+            f.write(missing_calculations_yaml)
 
         with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
             validate_config(self.config_path, self.schema_path)
-        self.assertIn("'aggregations' is a required property", ctx.exception.message)
+        self.assertIn("'calculations' is a required property", ctx.exception.message)
 
     def test_validate_config_empty_file(self):
         """Verifies that a completely empty configuration file raises ValidationError."""
@@ -203,58 +201,61 @@ def test_validate_config_empty_file(self):
 
         with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
             validate_config(self.config_path, self.schema_path)
-        self.assertIn("'aggregations' is a required property", ctx.exception.message)
+        self.assertIn("'calculations' is a required property", ctx.exception.message)
 
 
 class TestValidatorConditionalDependencies(unittest.TestCase):
-    """Verifies type-specific conditional dependencies (OneOf / dependencies)."""
+    """Verifies type-specific conditional sub-block dependencies."""
 
     def setUp(self):
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
         self.tmpdir = tempfile.TemporaryDirectory()
-        self.config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+        self.config_path = os.path.join(self.tmpdir.name, "config.yaml")
 
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def test_validate_config_place_missing_field(self):
-        """Verifies that a place step missing the required 'source_type' raises ValidationError."""
-        invalid_place_missing_field_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: place
-                destination_type: State
-                imports: ["*"]
+    def test_validate_config_place_missing_subblock(self):
+        """Verifies that a PLACE_AGGREGATION step missing 'place_aggregation' raises ValidationError."""
+        invalid_place_missing_yaml = textwrap.dedent("""\
+            calculations:
+              - type: PLACE_AGGREGATION
+                input_imports:
+                  - ImportA
         """)
         with open(self.config_path, "w") as f:
-            f.write(invalid_place_missing_field_yaml)
+            f.write(invalid_place_missing_yaml)
 
         with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
             validate_config(self.config_path, self.schema_path)
-        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
-
-    def test_validate_config_stat_var_missing_field(self):
-        """Verifies that a stat_var step missing the required 'source_sv_ids' raises ValidationError."""
-        invalid_stat_var_missing_field_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: stat_var
-                ancestor_sv_id: Count_Person
-                imports: ["*"]
+        self.assertIn("'place_aggregation' is a required property", ctx.exception.message)
+
+    def test_validate_config_stat_var_missing_subblock(self):
+        """Verifies that a STAT_VAR_AGGREGATION step missing 'stat_var_aggregation' raises ValidationError."""
+        invalid_stat_var_missing_yaml = textwrap.dedent("""\
+            calculations:
+              - type: STAT_VAR_AGGREGATION
+                input_imports:
+                  - ImportA
         """)
         with open(self.config_path, "w") as f:
-            f.write(invalid_stat_var_missing_field_yaml)
+            f.write(invalid_stat_var_missing_yaml)
 
         with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
             validate_config(self.config_path, self.schema_path)
-        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+        self.assertIn("'stat_var_aggregation' is a required property", ctx.exception.message)
 
     def test_validate_config_stat_var_empty_source_svs(self):
-        """Verifies that a stat_var step with an empty source_sv_ids array raises ValidationError."""
+        """Verifies that an aggregation item with an empty source_sv_ids array raises ValidationError."""
         invalid_stat_var_empty_svs_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: stat_var
-                ancestor_sv_id: Count_Person
-                source_sv_ids: []
-                imports: ["*"]
+            calculations:
+              - type: STAT_VAR_AGGREGATION
+                input_imports:
+                  - ImportA
+                stat_var_aggregation:
+                  aggregations:
+                    - ancestor_sv_id: Count_Person
+                      source_sv_ids: []
         """)
         with open(self.config_path, "w") as f:
             f.write(invalid_stat_var_empty_svs_yaml)
@@ -263,20 +264,20 @@ def test_validate_config_stat_var_empty_source_svs(self):
             validate_config(self.config_path, self.schema_path)
         self.assertIn("should be non-empty", ctx.exception.message)
 
-    def test_validate_config_entity_missing_field(self):
-        """Verifies that an entity step missing the required 'location_props' raises ValidationError."""
-        invalid_entity_missing_field_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: entity
-                entity_types: ["Event"]
-                imports: ["*"]
+    def test_validate_config_entity_missing_subblock(self):
+        """Verifies that an ENTITY_AGGREGATION step missing 'entity_aggregation' raises ValidationError."""
+        invalid_entity_missing_yaml = textwrap.dedent("""\
+            calculations:
+              - type: ENTITY_AGGREGATION
+                input_imports:
+                  - ImportA
         """)
         with open(self.config_path, "w") as f:
-            f.write(invalid_entity_missing_field_yaml)
+            f.write(invalid_entity_missing_yaml)
 
         with self.assertRaises(jsonschema.exceptions.ValidationError) as ctx:
             validate_config(self.config_path, self.schema_path)
-        self.assertIn("is not valid under any of the given schemas", ctx.exception.message)
+        self.assertIn("'entity_aggregation' is a required property", ctx.exception.message)
 
 
 class TestValidatorErrorsAndFileSystem(unittest.TestCase):
@@ -285,7 +286,7 @@ class TestValidatorErrorsAndFileSystem(unittest.TestCase):
     def setUp(self):
         self.schema_path = os.path.join(os.path.dirname(__file__), "schema.json")
         self.tmpdir = tempfile.TemporaryDirectory()
-        self.config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+        self.config_path = os.path.join(self.tmpdir.name, "config.yaml")
 
     def tearDown(self):
         self.tmpdir.cleanup()
@@ -293,10 +294,10 @@ def tearDown(self):
     def test_validate_config_yaml_syntax_error(self):
         """Verifies that malformed YAML syntax raises YAMLError."""
         malformed_yaml = textwrap.dedent("""\
-            aggregations:
-              - type: linked_edges
-              imports:
-              - "*"
+            calculations:
+              - type: PLACE_AGGREGATION
+              input_imports:
+              - "ImportA"
         """)
         with open(self.config_path, "w") as f:
             f.write(malformed_yaml)
@@ -313,7 +314,7 @@ def test_validate_config_missing_config_file(self):
     def test_validate_config_missing_schema_file(self):
         """Verifies that a missing schema file path raises FileNotFoundError."""
         with open(self.config_path, "w") as f:
-            f.write("aggregations: []")
+            f.write("calculations: []")
 
         with self.assertRaises(FileNotFoundError) as ctx:
             validate_config(self.config_path, "non_existent_schema.json")

From c1cafeeafe1d6c311d33ceb413fc651a9f1e542c Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 19:35:01 +0530
Subject: [PATCH 22/33] feat(aggregation): update orchestrator to process
 per-import isolated calculations with synchronized round execution

---
 .../aggregation/orchestrator.py               | 335 ++++++++++--------
 .../aggregation/orchestrator_test.py          | 153 +++-----
 2 files changed, 231 insertions(+), 257 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index 4ef337aa8..e2c20c575 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -12,31 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Aggregation orchestrator for Data Commons ingestion workflow."""
+
 import logging
 import os
+import time
 from typing import Any, Dict, List, Optional
 
 from .bq_executor import BigQueryExecutor
 from .linked_edge_generator import LinkedEdgeGenerator
+from .place_aggregation_generator import PlaceAggregationGenerator
 from .provenance_summary_generator import ProvenanceSummaryGenerator
 from .stat_var_aggregator import StatVarAggregator
-from .place_aggregation_generator import PlaceAggregationGenerator
 from .stat_var_group_generator import StatVarGroupGenerator
 from .validator import validate_config
 
 
 class AggregationOrchestrator:
-    """Orchestrates the overall aggregation workflow."""
-
-    def __init__(self,
-                 connection_id: str,
-                 project_id: str,
-                 instance_id: str,
-                 database_id: str,
-                 location: Optional[str] = None,
-                 is_base_dc: bool = True,
-                 config_file_path: Optional[str] = None) -> None:
-        """Initializes the orchestrator and loads/validates the configuration.
+    """Orchestrates the overall aggregation workflow across multi-round execution."""
+
+    def __init__(
+        self,
+        connection_id: str,
+        project_id: str,
+        instance_id: str,
+        database_id: str,
+        location: Optional[str] = None,
+        is_base_dc: bool = True,
+        config_dir: Optional[str] = None,
+        config_file_path: Optional[str] = None
+    ) -> None:
+        """Initializes the orchestrator and loads/validates configuration files.
 
         Args:
             connection_id: BigQuery connection ID to Spanner.
@@ -45,175 +51,212 @@ def __init__(self,
             database_id: Spanner Database ID.
             location: BigQuery location.
             is_base_dc: Whether this is running in the base Data Commons environment.
-            config_file_path: Optional custom path to the aggregation.yaml file.
-                If not specified, defaults to the aggregation.yaml in the parent directory.
+            config_dir: Directory containing aggregation YAML configs (default: configs/).
+            config_file_path: Optional path to single config file or directory.
         """
-        self.executor = BigQueryExecutor(connection_id=connection_id,
-                                         project_id=project_id,
-                                         instance_id=instance_id,
-                                         database_id=database_id,
-                                         location=location,
-                                         run_sequential=False)
-
+        self.executor = BigQueryExecutor(
+            connection_id=connection_id,
+            project_id=project_id,
+            instance_id=instance_id,
+            database_id=database_id,
+            location=location,
+            run_sequential=False
+        )
         self.is_base_dc = is_base_dc
 
-        # Resolve paths for default config and schema
+        # Resolve paths for config directory and schema
         curr_dir = os.path.dirname(os.path.abspath(__file__))
-        if not config_file_path:
-            config_file_path = os.path.join(curr_dir, "..", "aggregation.yaml")
+        target_config = config_dir or config_file_path or os.path.join(curr_dir, "configs")
         schema_file_path = os.path.join(curr_dir, "schema.json")
 
         # Load and validate configuration
-        self.aggregations = validate_config(config_file_path, schema_file_path)
+        self.calculations = validate_config(target_config, schema_file_path)
 
-    def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
-        """Executes all enabled aggregations in the specified stage in parallel.
+    def run(self, active_imports: List[str]) -> None:
+        """Executes aggregations independently for each active import.
 
-        Args:
-            stage_num: The stage number to execute.
-            active_imports: The list of active import names in this run.
+        Blocks and synchronizes round progression for each import:
+        Round 1 -> Wait -> Round 2 -> Wait -> Round 3 -> Wait.
 
-        Returns:
-            A list of BigQuery job IDs submitted for this stage.
+        Args:
+            active_imports: List of active import dataset names to process.
         """
-        logging.info(f"Starting Aggregation Orchestration for Stage {stage_num}")
-        logging.info(f"Active imports in this run: {active_imports}")
-        jobs = []
+        logging.info(f"Starting Aggregation Orchestrator run for active imports: {active_imports}")
 
-        for config in self.aggregations:
-            # 1. Skip if disabled
-            if config.get("disabled", False):
-                continue
+        for single_import in active_imports:
+            logging.info(f"=== Starting Aggregation Pipeline for Import: '{single_import}' ===")
+            active_rounds = self.get_active_rounds_for_import(single_import)
 
-            # 2. Filter by stage
-            if config.get("stage", 1) != stage_num:
+            if not active_rounds:
+                logging.info(f"No aggregation steps configured for import '{single_import}'. Skipping.")
                 continue
 
-            # 3. Filter by active imports
-            applicable_imports = self._get_applicable_imports(config, active_imports)
-            if not applicable_imports:
-                continue
+            for round_num in active_rounds:
+                logging.info(f"--- Triggering Round {round_num} for import '{single_import}' ---")
+                self._execute_and_synchronize_round(single_import, round_num)
 
-            # 4. Route to correct generator helper
-            step_type = config["type"]
-            logging.info(f"Triggering step '{step_type}' in Stage {stage_num}...")
-            
-            step_jobs = []
-            if step_type == "place":
-                step_jobs = self._trigger_place(config, applicable_imports)
-            elif step_type == "stat_var":
-                step_jobs = self._trigger_stat_var(config, applicable_imports)
-            elif step_type == "linked_edges":
-                step_jobs = self._trigger_linked_edges(config, applicable_imports)
-            elif step_type == "provenance_summary":
-                step_jobs = self._trigger_provenance_summary(config, applicable_imports)
-            elif step_type == "stat_var_groups":
-                step_jobs = self._trigger_stat_var_groups(config, applicable_imports)
-            else:
-                raise ValueError(f"Unsupported or unimplemented aggregation step type: {step_type}")
+            logging.info(f"=== Successfully completed all aggregation rounds for Import: '{single_import}' ===")
 
-            # Collect BQ jobs
-            for job in step_jobs:
-                if job and job.job_id:
-                    jobs.append(job.job_id)
-
-        logging.info(f"=== Stage {stage_num} initiated successfully. Submitted {len(jobs)} BigQuery jobs: {jobs} ===")
-        return jobs
-
-    def has_stage(self, stage_num: int, active_imports: List[str]) -> bool:
-        """Checks if there are any active, enabled aggregations configured for the stage.
+    def get_active_rounds_for_import(self, single_import: str) -> List[int]:
+        """Returns a sorted list of unique active round numbers for a single import.
 
         Args:
-            stage_num: The stage number to check.
-            active_imports: The list of active import names.
+            single_import: The active import dataset name.
 
         Returns:
-            True if the stage has at least one aggregation that will run, False otherwise.
+            Sorted list of round numbers (e.g., [1, 2, 3]).
         """
-        for config in self.aggregations:
-            if config.get("disabled", False):
-                continue
-            if config.get("stage", 1) != stage_num:
-                continue
-            
-            # Check if it applies to any active imports
-            if self._get_applicable_imports(config, active_imports):
-                return True
-
-        return False
-
-    def get_active_stages(self, active_imports: List[str]) -> List[int]:
-        """Returns a sorted list of unique, active, and enabled stage numbers.
+        rounds = set()
+        for calc in self.calculations:
+            if self._calc_applies_to_import(calc, single_import):
+                rounds.add(calc.get("round", 1))
+        return sorted(list(rounds))
+
+    def get_active_rounds(self, active_imports: List[str]) -> List[int]:
+        """Returns a sorted list of unique active round numbers across active imports."""
+        rounds = set()
+        for single_import in active_imports:
+            rounds.update(self.get_active_rounds_for_import(single_import))
+        return sorted(list(rounds))
+
+    def execute_round(self, stage_num: int, active_imports: List[str]) -> List[str]:
+        """Executes and collects BigQuery job IDs for a given round/stage.
 
         Args:
-            active_imports: The list of active import names.
+            stage_num: The round/stage number to execute.
+            active_imports: List of active import dataset names.
 
         Returns:
-            A sorted list of unique active stage numbers.
+            List of BigQuery job IDs submitted.
         """
-        stages = set()
-        for config in self.aggregations:
-            step_type = config.get("type")
-            stage_num = config.get("stage", 1)
-
-            if config.get("disabled", False):
-                logging.info(f"[Config Scan] Skipping step '{step_type}' in Stage {stage_num} because it is disabled.")
+        job_ids = []
+        for single_import in active_imports:
+            jobs = self._dispatch_round_steps(single_import, stage_num)
+            for job in jobs:
+                if job and getattr(job, "job_id", None):
+                    job_ids.append(job.job_id)
+        return job_ids
+
+    def _execute_and_synchronize_round(self, single_import: str, round_num: int) -> None:
+        """Triggers round steps for a single import and blocks until completion."""
+        jobs = self._dispatch_round_steps(single_import, round_num)
+        job_ids = [job.job_id for job in jobs if job and getattr(job, "job_id", None)]
+
+        if not job_ids:
+            logging.info(f"No BigQuery jobs submitted for Round {round_num} (import: '{single_import}').")
+            return
+
+        logging.info(f"Submitted {len(job_ids)} job(s) for Round {round_num} (import: '{single_import}'): {job_ids}")
+        self._wait_for_jobs(job_ids)
+
+    def _dispatch_round_steps(self, single_import: str, round_num: int) -> List[Any]:
+        """Dispatches matching calculation steps for an import and round number."""
+        jobs = []
+        for calc in self.calculations:
+            if calc.get("round", 1) != round_num:
                 continue
-
-            applicable_imports = self._get_applicable_imports(config, active_imports)
-            if not applicable_imports:
-                logging.info(f"[Config Scan] Skipping step '{step_type}' in Stage {stage_num} because it does not apply to active imports: {active_imports}.")
+            if not self._calc_applies_to_import(calc, single_import):
                 continue
 
-            logging.info(f"[Config Scan] Step '{step_type}' in Stage {stage_num} is ACTIVE for imports: {applicable_imports}.")
-            stages.add(stage_num)
-        
-        sorted_stages = sorted(list(stages))
-        logging.info(f"[Config Scan] Active stages resolved: {sorted_stages}")
-        return sorted_stages
+            step_type = calc["type"]
+            logging.info(f"Triggering '{step_type}' (Round {round_num}) for import '{single_import}'...")
+
+            step_jobs = []
+            if step_type == "PLACE_AGGREGATION" or step_type == "place":
+                step_jobs = self._trigger_place(calc, [single_import])
+            elif step_type == "STAT_VAR_AGGREGATION" or step_type == "stat_var":
+                step_jobs = self._trigger_stat_var(calc, [single_import])
+            elif step_type == "linked_edges":
+                step_jobs = self._trigger_linked_edges(calc, [single_import])
+            elif step_type == "provenance_summary":
+                step_jobs = self._trigger_provenance_summary(calc, [single_import])
+            elif step_type == "stat_var_groups":
+                step_jobs = self._trigger_stat_var_groups(calc, [single_import])
+            else:
+                logging.warning(
+                    f"Calculation type '{step_type}' configured for import '{single_import}' has no active generator handler."
+                )
+
+            jobs.extend(step_jobs)
+        return jobs
 
-    def check_jobs_status(self, job_ids: List[str]) -> Dict[str, Any]:
-        """Checks the status of the specified BigQuery job IDs.
+    def _wait_for_jobs(self, job_ids: List[str], poll_interval: int = 5) -> None:
+        """Blocks until all specified BigQuery job IDs complete successfully.
 
-        Delegates to the BigQueryExecutor's get_jobs_status.
+        Args:
+            job_ids: List of BigQuery job IDs to wait for.
+            poll_interval: Seconds between polling checks.
+
+        Raises:
+            RuntimeError: If any job fails.
         """
-        try:
-            return self.executor.get_jobs_status(job_ids)
-        except Exception as e:
-            logging.error(f"Failed to check jobs status: {e}")
-            raise e
+        if not job_ids:
+            return
+
+        logging.info(f"Waiting for {len(job_ids)} BigQuery job(s) to complete: {job_ids}")
+        while True:
+            status_info = self.executor.get_jobs_status(job_ids)
+            status = status_info.get("status")
+
+            if status == "DONE":
+                logging.info("All BigQuery jobs in round completed successfully.")
+                return
+            elif status == "FAILED":
+                error_msg = status_info.get("error", "One or more BigQuery jobs failed.")
+                logging.error(f"Round execution failed: {error_msg}")
+                raise RuntimeError(f"Aggregation execution failed: {error_msg}")
+            
+            time.sleep(poll_interval)
 
     def _trigger_place(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
         """Triggers place-level rollup aggregations."""
-        source_type = config["source_type"]
-        destination_type = config["destination_type"]
-        logging.info(
-            f"  -> Place Rollup: {source_type} -> {destination_type} for imports {applicable_imports}"
-        )
+        place_cfg = config.get("place_aggregation", {})
+        from_type = place_cfg.get("from_place_types") or config.get("source_type")
+        to_type = place_cfg.get("to_place_types") or config.get("destination_type")
+
+        logging.info(f"  -> Place Rollup: {from_type} -> {to_type} for imports {applicable_imports}")
         generator = PlaceAggregationGenerator(self.executor, self.is_base_dc)
         job = generator.aggregate_places(
             import_names=applicable_imports,
-            source_type=source_type,
-            destination_type=destination_type,
-            allow_multiple_to_places=config.get("allow_multiple_to_places", False)
+            source_type=from_type,
+            destination_type=to_type,
+            allow_multiple_to_places=place_cfg.get("allow_multiple_to_places", False)
         )
         return [job] if job else []
 
     def _trigger_stat_var(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
         """Triggers statistical variable aggregations."""
-        ancestor_sv = config["ancestor_sv_id"]
-        source_svs = config["source_sv_ids"]
-        logging.info(
-            f"  -> Stat Var Aggregation: ancestor '{ancestor_sv}' (sources: {source_svs}) for imports {applicable_imports}"
-        )
+        stat_cfg = config.get("stat_var_aggregation", {})
+        aggregations = stat_cfg.get("aggregations", [])
+        output_import_name = config.get("output_import") or config.get("output_import_name")
+
+        # Backwards compatibility fallback for single item config
+        if not aggregations and "ancestor_sv_id" in config:
+            aggregations = [{
+                "ancestor_sv_id": config["ancestor_sv_id"],
+                "source_sv_ids": config["source_sv_ids"],
+                "skip_all_sources_present_check": config.get("skip_all_sources_present_check", False)
+            }]
+
         generator = StatVarAggregator(self.executor, self.is_base_dc)
-        return generator.aggregate_stat_vars(
-            ancestor_sv=ancestor_sv,
-            source_svs=source_svs,
-            import_names=applicable_imports,
-            output_import_name=config.get("output_import_name"),
-            skip_all_sources_present_check=config.get("skip_all_sources_present_check", False)
-        )
+        jobs = []
+
+        for item in aggregations:
+            ancestor_sv = item["ancestor_sv_id"]
+            source_svs = item["source_sv_ids"]
+            logging.info(
+                f"  -> Stat Var Aggregation: ancestor '{ancestor_sv}' (sources: {source_svs}) for imports {applicable_imports}"
+            )
+            item_jobs = generator.aggregate_stat_vars(
+                ancestor_sv=ancestor_sv,
+                source_svs=source_svs,
+                import_names=applicable_imports,
+                output_import_name=output_import_name,
+                skip_all_sources_present_check=item.get("skip_all_sources_present_check", False)
+            )
+            jobs.extend(item_jobs)
+
+        return jobs
 
     def _trigger_linked_edges(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
         """Triggers linked edge aggregations."""
@@ -233,13 +276,13 @@ def _trigger_stat_var_groups(self, config: Dict[str, Any], applicable_imports: L
         generator = StatVarGroupGenerator(self.executor, self.is_base_dc)
         return generator.run_all(applicable_imports)
 
-    def _get_applicable_imports(self, config: Dict[str, Any], active_imports: List[str]) -> List[str]:
-        """Determines which active imports apply to this aggregation config."""
-        configured_imports = config["imports"]
-        
-        # Explicit wildcard check
-        if "*" in configured_imports:
-            return active_imports
-            
-        # Intersection of configured and active imports
-        return list(set(configured_imports).intersection(active_imports))
+    def _calc_applies_to_import(self, calc: Dict[str, Any], single_import: str) -> bool:
+        """Determines if a calculation step applies to a single import."""
+        if calc.get("disabled", False):
+            return False
+
+        configured_imports = calc.get("input_imports") or calc.get("imports", [])
+        if "*" in configured_imports or single_import in configured_imports:
+            return True
+
+        return False
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index a0b2627f4..999ca8d36 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -14,7 +14,6 @@
 
 """Unit tests for the AggregationOrchestrator class."""
 
-import json
 import os
 import sys
 import tempfile
@@ -27,41 +26,38 @@
 from aggregation import AggregationOrchestrator
 
 VALID_CONFIG_YAML = textwrap.dedent("""\
-    aggregations:
-      - type: linked_edges
-        imports: ["*"]
-        stage: 1
-
-      - type: place
-        source_type: County
-        destination_type: State
-        allow_multiple_to_places: false
-        imports: ["USFed_Census"]
-        stage: 1
-
-      - type: place
-        source_type: State
-        destination_type: Country
-        imports: ["*"]
-        stage: 2
-        disabled: true
-
-      - type: stat_var
-        ancestor_sv_id: Count_Person
-        source_sv_ids: ["Count_Person_Male", "Count_Person_Female"]
-        skip_all_sources_present_check: true
-        imports: ["USFed_Census"]
-        stage: 2
+    calculations:
+      - type: PLACE_AGGREGATION
+        input_imports:
+          - USFed_Census
+        output_import: USFed_Census_AggState
+        round: 1
+        place_aggregation:
+          from_place_types: County
+          to_place_types: State
+
+      - type: STAT_VAR_AGGREGATION
+        input_imports:
+          - USFed_Census
+        output_import: USFed_Census_StatVarAgg
+        round: 2
+        stat_var_aggregation:
+          aggregations:
+            - ancestor_sv_id: Count_Person
+              source_sv_ids:
+                - Count_Person_Male
+                - Count_Person_Female
+              skip_all_sources_present_check: true
 """)
 
 
 @patch('aggregation.orchestrator.BigQueryExecutor')
 class TestOrchestratorScanning(unittest.TestCase):
-    """Tests the stage scanning and active stage resolution methods."""
+    """Tests round scanning and active round resolution methods."""
 
     def setUp(self):
         self.tmpdir = tempfile.TemporaryDirectory()
-        config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+        config_path = os.path.join(self.tmpdir.name, "config.yaml")
         with open(config_path, "w") as f:
             f.write(VALID_CONFIG_YAML)
 
@@ -76,41 +72,24 @@ def setUp(self):
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def test_has_stage(self, mock_executor):
-        """Tests the has_stage method for active, disabled, and non-matching stages."""
-        self.assertTrue(self.orchestrator.has_stage(1, ["AnyImport"]))
-        self.assertTrue(self.orchestrator.has_stage(1, ["USFed_Census"]))
+    def test_get_active_rounds_for_import(self, mock_executor):
+        """Tests getting active rounds for matching and non-matching imports."""
+        rounds = self.orchestrator.get_active_rounds_for_import("USFed_Census")
+        self.assertEqual(rounds, [1, 2])
 
-        self.assertFalse(self.orchestrator.has_stage(2, ["OtherImport"]))
-        self.assertTrue(self.orchestrator.has_stage(2, ["USFed_Census"]))
-
-        self.assertFalse(self.orchestrator.has_stage(3, ["USFed_Census"]))
-
-    def test_get_active_stages(self, mock_executor):
-        """Tests that get_active_stages correctly extracts, filters, and sorts active stages."""
-        stages = self.orchestrator.get_active_stages(active_imports=["USFed_Census"])
-        self.assertEqual(stages, [1, 2])
-
-        stages = self.orchestrator.get_active_stages(active_imports=["OtherImport"])
-        self.assertEqual(stages, [1])
+        rounds = self.orchestrator.get_active_rounds_for_import("OtherImport")
+        self.assertEqual(rounds, [])
 
 
 @patch('aggregation.orchestrator.BigQueryExecutor')
 @patch('aggregation.orchestrator.PlaceAggregationGenerator')
 @patch('aggregation.orchestrator.StatVarAggregator')
-@patch('aggregation.orchestrator.LinkedEdgeGenerator')
-@patch('aggregation.orchestrator.ProvenanceSummaryGenerator')
-@patch('aggregation.orchestrator.StatVarGroupGenerator')
 class TestOrchestratorExecution(unittest.TestCase):
-    """Tests stage execution, verifying parallel job submission and routing.
-
-    These tests execute stages, so they mock the executor and all five generators
-    to verify correct parameters are passed and jobs are collected.
-    """
+    """Tests round execution, verifying job submission and synchronization."""
 
     def setUp(self):
         self.tmpdir = tempfile.TemporaryDirectory()
-        config_path = os.path.join(self.tmpdir.name, "aggregation.yaml")
+        config_path = os.path.join(self.tmpdir.name, "config.yaml")
         with open(config_path, "w") as f:
             f.write(VALID_CONFIG_YAML)
 
@@ -125,25 +104,21 @@ def setUp(self):
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def test_execute_stage_1(self, mock_svg_gen, mock_prov_gen, mock_edge_gen, 
-                             mock_sv_agg, mock_place_gen, mock_executor):
-        """Tests executing Stage 1, verifying parallel job submission and wildcard resolution."""
+    def test_run_synchronized_pipeline(self, mock_sv_agg, mock_place_gen, mock_executor_cls):
+        """Tests complete synchronized run pipeline for an import across rounds."""
         mock_job1 = MagicMock()
-        mock_job1.job_id = "job-edge-1"
-        mock_edge_gen.return_value.run_all.return_value = [mock_job1]
+        mock_job1.job_id = "job-place-1"
+        mock_place_gen.return_value.aggregate_places.return_value = mock_job1
 
         mock_job2 = MagicMock()
-        mock_job2.job_id = "job-place-1"
-        mock_place_gen.return_value.aggregate_places.return_value = mock_job2
+        mock_job2.job_id = "job-sv-1"
+        mock_sv_agg.return_value.aggregate_stat_vars.return_value = [mock_job2]
 
-        job_ids = self.orchestrator.execute_stage(stage_num=1, active_imports=["USFed_Census"])
+        self.orchestrator.executor = MagicMock()
+        self.orchestrator.executor.get_jobs_status.return_value = {"status": "DONE"}
 
-        self.assertEqual(len(job_ids), 2)
-        self.assertIn("job-edge-1", job_ids)
-        self.assertIn("job-place-1", job_ids)
+        self.orchestrator.run(active_imports=["USFed_Census"])
 
-        mock_edge_gen.return_value.run_all.assert_called_once_with(["USFed_Census"])
-        
         mock_place_gen.return_value.aggregate_places.assert_called_once_with(
             import_names=["USFed_Census"],
             source_type="County",
@@ -151,58 +126,14 @@ def test_execute_stage_1(self, mock_svg_gen, mock_prov_gen, mock_edge_gen,
             allow_multiple_to_places=False
         )
 
-    def test_execute_stage_2_with_disabled_and_filtering(self, mock_svg_gen, mock_prov_gen, mock_edge_gen, 
-                                                         mock_sv_agg, mock_place_gen, mock_executor):
-        """Tests Stage 2, verifying that disabled steps are skipped and non-matching imports are filtered."""
-        mock_job_sv = MagicMock()
-        mock_job_sv.job_id = "job-sv-1"
-        mock_sv_agg.return_value.aggregate_stat_vars.return_value = [mock_job_sv]
-
-        job_ids = self.orchestrator.execute_stage(stage_num=2, active_imports=["OtherImport"])
-        self.assertEqual(len(job_ids), 0)
-        mock_place_gen.return_value.aggregate_places.assert_not_called()
-        mock_sv_agg.return_value.aggregate_stat_vars.assert_not_called()
-
-        job_ids = self.orchestrator.execute_stage(stage_num=2, active_imports=["USFed_Census"])
-        
-        self.assertEqual(job_ids, ["job-sv-1"])
-        mock_place_gen.return_value.aggregate_places.assert_not_called()
         mock_sv_agg.return_value.aggregate_stat_vars.assert_called_once_with(
             ancestor_sv="Count_Person",
             source_svs=["Count_Person_Male", "Count_Person_Female"],
             import_names=["USFed_Census"],
-            output_import_name=None,
+            output_import_name="USFed_Census_StatVarAgg",
             skip_all_sources_present_check=True
         )
 
-    def test_execute_stage_unsupported_type(self, *mocks):
-        """Tests that an unsupported aggregation step type raises ValueError."""
-        unimplemented_config = textwrap.dedent("""\
-            aggregations:
-              - type: entity
-                entity_types: ["MortalityEvent"]
-                location_props: ["location"]
-                imports: ["*"]
-                stage: 1
-        """)
-        
-        with tempfile.TemporaryDirectory() as local_tmpdir:
-            local_config_path = os.path.join(local_tmpdir, "aggregation.yaml")
-            with open(local_config_path, "w") as f:
-                f.write(unimplemented_config)
-            
-            local_orchestrator = AggregationOrchestrator(
-                connection_id="conn",
-                project_id="proj",
-                instance_id="inst",
-                database_id="db",
-                config_file_path=local_config_path
-            )
-            
-            with self.assertRaises(ValueError) as ctx:
-                local_orchestrator.execute_stage(stage_num=1, active_imports=["USFed_Census"])
-            self.assertIn("Unsupported or unimplemented aggregation step type: entity", str(ctx.exception))
-
 
 if __name__ == '__main__':
     unittest.main()

From 969f1955f2ec83930872a91d0f227526bd0c7dd8 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 19:41:48 +0530
Subject: [PATCH 23/33] refactor(aggregation): rename round to stage across
 configs, schema, orchestrator, and tests

---
 .../aggregation/configs/place.yaml            | 34 ++++-----
 .../aggregation/configs/statvar_series.yaml   | 20 +++---
 .../aggregation/orchestrator.py               | 72 +++++++++----------
 .../aggregation/orchestrator_test.py          | 22 +++---
 .../ingestion-helper/aggregation/schema.json  |  2 +-
 .../aggregation/validator_test.py             |  4 +-
 6 files changed, 77 insertions(+), 77 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
index 892a2b128..cf90fb7bb 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
@@ -5,7 +5,7 @@ calculations:
     place_aggregation:
       from_place_types: State
       to_place_types: Country
-    round: 1
+    stage: 1
     output_import: CensusACS5YearSurvey_AggCountry
 
   - type: PLACE_AGGREGATION
@@ -14,7 +14,7 @@ calculations:
     place_aggregation:
       from_place_types: State
       to_place_types: Country
-    round: 1
+    stage: 1
     output_import: CensusSAHIE_AggCountry
 
   - type: PLACE_AGGREGATION
@@ -23,7 +23,7 @@ calculations:
     place_aggregation:
       from_place_types: County
       to_place_types: State
-    round: 1
+    stage: 1
     output_import: CDCMortality_AggState
 
   - type: PLACE_AGGREGATION
@@ -32,7 +32,7 @@ calculations:
     place_aggregation:
       from_place_types: State
       to_place_types: Country
-    round: 2
+    stage: 2
     output_import: CDCMortality_AggState_AggCountry
 
   # - type: PLACE_AGGREGATION
@@ -41,7 +41,7 @@ calculations:
   #   place_aggregation:
   #     from_place_types: State
   #     to_place_types: Country
-  #   round: 1
+  #   stage: 1
   #   output_import: FBIGovCrime_AggCountry
 
   - type: PLACE_AGGREGATION
@@ -50,7 +50,7 @@ calculations:
     place_aggregation:
       from_place_types: County
       to_place_types: State
-    round: 1
+    stage: 1
     output_import: DEA_ARCOS_AggState
 
   - type: PLACE_AGGREGATION
@@ -59,7 +59,7 @@ calculations:
     place_aggregation:
       from_place_types: State
       to_place_types: Country
-    round: 2
+    stage: 2
     output_import: DEA_ARCOS_AggState_AggCountry
 
   - type: PLACE_AGGREGATION
@@ -68,7 +68,7 @@ calculations:
     place_aggregation:
       from_place_types: CensusBlockGroup
       to_place_types: CensusTract
-    round: 1
+    stage: 1
     output_import: EPA_EJSCREEN_AggCensusTract
 
   - type: PLACE_AGGREGATION
@@ -77,7 +77,7 @@ calculations:
     place_aggregation:
       from_place_types: CensusTract
       to_place_types: County
-    round: 2
+    stage: 2
     output_import: EPA_EJSCREEN_AggCensusTract_AggCounty
 
   - type: PLACE_AGGREGATION
@@ -86,7 +86,7 @@ calculations:
     place_aggregation:
       from_place_types: CensusBlockGroup
       to_place_types: CensusTract
-    round: 1
+    stage: 1
     output_import: DeepSolar_AggCensusTract
 
   - type: PLACE_AGGREGATION
@@ -95,7 +95,7 @@ calculations:
     place_aggregation:
       from_place_types: CensusTract
       to_place_types: County
-    round: 2
+    stage: 2
     output_import: DeepSolar_AggCensusTract_AggCounty
 
   - type: PLACE_AGGREGATION
@@ -104,7 +104,7 @@ calculations:
     place_aggregation:
       from_place_types: EpaReportingFacility
       to_place_types: County
-    round: 1
+    stage: 1
     output_import: EPA_GHGRP_AggCounty
 
   - type: PLACE_AGGREGATION
@@ -113,7 +113,7 @@ calculations:
     place_aggregation:
       from_place_types: EpaReportingFacility
       to_place_types: CensusZipCodeTabulationArea
-    round: 1
+    stage: 1
     output_import: EPA_GHGRP_AggCensusZipCodeTabulationArea
 
   - type: PLACE_AGGREGATION
@@ -122,7 +122,7 @@ calculations:
     place_aggregation:
       from_place_types: County
       to_place_types: State
-    round: 2
+    stage: 2
     output_import: EPA_GHGRP_AggCounty_AggState
 
   - type: PLACE_AGGREGATION
@@ -132,7 +132,7 @@ calculations:
       from_place_types: GeoGridPlace_0.25Deg
       to_place_types: County
       allow_multiple_to_places: true
-    round: 1
+    stage: 1
     output_import: RFF_USGridGeo_WeatherVariabilityForecast_AggCounty
 
   - type: PLACE_AGGREGATION
@@ -142,7 +142,7 @@ calculations:
       from_place_types: State
       to_place_types: Country
       allow_multiple_to_places: true
-    round: 1
+    stage: 1
     output_import: India_RBIStateDomesticProduct_AggCountry
 
   - type: PLACE_AGGREGATION
@@ -152,5 +152,5 @@ calculations:
       from_place_types: State
       to_place_types: Country
       allow_multiple_to_places: true
-    round: 1
+    stage: 1
     output_import: India_RBIStateDomesticProduct_StatVarAgg_AggCountry
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
index 4359a8f93..c565076d9 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
@@ -2,7 +2,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXDCP30
-    round: 1
+    stage: 1
     output_import: NASA_NEXDCP30_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -16,7 +16,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_Subnational
-    round: 1
+    stage: 1
     output_import: NASA_NEXGDDP_Subnational_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -32,7 +32,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_Country
-    round: 1
+    stage: 1
     output_import: NASA_NEXGDDP_Country_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -46,7 +46,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational
-    round: 1
+    stage: 1
     output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -62,7 +62,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50
-    round: 1
+    stage: 1
     output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -78,7 +78,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational
-    round: 2
+    stage: 2
     output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
@@ -92,7 +92,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50
-    round: 2
+    stage: 2
     output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
@@ -106,7 +106,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
-    round: 3
+    stage: 3
     output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrOverTimeOnStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
@@ -209,7 +209,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
-    round: 3
+    stage: 3
     output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrOverTimeOnStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
@@ -312,7 +312,7 @@ calculations:
   - type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_IpccPlaces50
-    round: 1
+    stage: 1
     output_import: NASA_NEXGDDP_IpccPlaces50_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index e2c20c575..0f104ab49 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -29,7 +29,7 @@
 
 
 class AggregationOrchestrator:
-    """Orchestrates the overall aggregation workflow across multi-round execution."""
+    """Orchestrates the overall aggregation workflow across multi-stage execution."""
 
     def __init__(
         self,
@@ -75,8 +75,8 @@ def __init__(
     def run(self, active_imports: List[str]) -> None:
         """Executes aggregations independently for each active import.
 
-        Blocks and synchronizes round progression for each import:
-        Round 1 -> Wait -> Round 2 -> Wait -> Round 3 -> Wait.
+        Blocks and synchronizes stage progression for each import:
+        Stage 1 -> Wait -> Stage 2 -> Wait -> Stage 3 -> Wait.
 
         Args:
             active_imports: List of active import dataset names to process.
@@ -85,45 +85,45 @@ def run(self, active_imports: List[str]) -> None:
 
         for single_import in active_imports:
             logging.info(f"=== Starting Aggregation Pipeline for Import: '{single_import}' ===")
-            active_rounds = self.get_active_rounds_for_import(single_import)
+            active_stages = self.get_active_stages_for_import(single_import)
 
-            if not active_rounds:
+            if not active_stages:
                 logging.info(f"No aggregation steps configured for import '{single_import}'. Skipping.")
                 continue
 
-            for round_num in active_rounds:
-                logging.info(f"--- Triggering Round {round_num} for import '{single_import}' ---")
-                self._execute_and_synchronize_round(single_import, round_num)
+            for stage_num in active_stages:
+                logging.info(f"--- Triggering Stage {stage_num} for import '{single_import}' ---")
+                self._execute_and_synchronize_stage(single_import, stage_num)
 
-            logging.info(f"=== Successfully completed all aggregation rounds for Import: '{single_import}' ===")
+            logging.info(f"=== Successfully completed all aggregation stages for Import: '{single_import}' ===")
 
-    def get_active_rounds_for_import(self, single_import: str) -> List[int]:
-        """Returns a sorted list of unique active round numbers for a single import.
+    def get_active_stages_for_import(self, single_import: str) -> List[int]:
+        """Returns a sorted list of unique active stage numbers for a single import.
 
         Args:
             single_import: The active import dataset name.
 
         Returns:
-            Sorted list of round numbers (e.g., [1, 2, 3]).
+            Sorted list of stage numbers (e.g., [1, 2, 3]).
         """
-        rounds = set()
+        stages = set()
         for calc in self.calculations:
             if self._calc_applies_to_import(calc, single_import):
-                rounds.add(calc.get("round", 1))
-        return sorted(list(rounds))
+                stages.add(calc.get("stage", 1))
+        return sorted(list(stages))
 
-    def get_active_rounds(self, active_imports: List[str]) -> List[int]:
-        """Returns a sorted list of unique active round numbers across active imports."""
-        rounds = set()
+    def get_active_stages(self, active_imports: List[str]) -> List[int]:
+        """Returns a sorted list of unique active stage numbers across active imports."""
+        stages = set()
         for single_import in active_imports:
-            rounds.update(self.get_active_rounds_for_import(single_import))
-        return sorted(list(rounds))
+            stages.update(self.get_active_stages_for_import(single_import))
+        return sorted(list(stages))
 
-    def execute_round(self, stage_num: int, active_imports: List[str]) -> List[str]:
-        """Executes and collects BigQuery job IDs for a given round/stage.
+    def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
+        """Executes and collects BigQuery job IDs for a given stage.
 
         Args:
-            stage_num: The round/stage number to execute.
+            stage_num: The stage number to execute.
             active_imports: List of active import dataset names.
 
         Returns:
@@ -131,35 +131,35 @@ def execute_round(self, stage_num: int, active_imports: List[str]) -> List[str]:
         """
         job_ids = []
         for single_import in active_imports:
-            jobs = self._dispatch_round_steps(single_import, stage_num)
+            jobs = self._dispatch_stage_steps(single_import, stage_num)
             for job in jobs:
                 if job and getattr(job, "job_id", None):
                     job_ids.append(job.job_id)
         return job_ids
 
-    def _execute_and_synchronize_round(self, single_import: str, round_num: int) -> None:
-        """Triggers round steps for a single import and blocks until completion."""
-        jobs = self._dispatch_round_steps(single_import, round_num)
+    def _execute_and_synchronize_stage(self, single_import: str, stage_num: int) -> None:
+        """Triggers stage steps for a single import and blocks until completion."""
+        jobs = self._dispatch_stage_steps(single_import, stage_num)
         job_ids = [job.job_id for job in jobs if job and getattr(job, "job_id", None)]
 
         if not job_ids:
-            logging.info(f"No BigQuery jobs submitted for Round {round_num} (import: '{single_import}').")
+            logging.info(f"No BigQuery jobs submitted for Stage {stage_num} (import: '{single_import}').")
             return
 
-        logging.info(f"Submitted {len(job_ids)} job(s) for Round {round_num} (import: '{single_import}'): {job_ids}")
+        logging.info(f"Submitted {len(job_ids)} job(s) for Stage {stage_num} (import: '{single_import}'): {job_ids}")
         self._wait_for_jobs(job_ids)
 
-    def _dispatch_round_steps(self, single_import: str, round_num: int) -> List[Any]:
-        """Dispatches matching calculation steps for an import and round number."""
+    def _dispatch_stage_steps(self, single_import: str, stage_num: int) -> List[Any]:
+        """Dispatches matching calculation steps for an import and stage number."""
         jobs = []
         for calc in self.calculations:
-            if calc.get("round", 1) != round_num:
+            if calc.get("stage", 1) != stage_num:
                 continue
             if not self._calc_applies_to_import(calc, single_import):
                 continue
 
             step_type = calc["type"]
-            logging.info(f"Triggering '{step_type}' (Round {round_num}) for import '{single_import}'...")
+            logging.info(f"Triggering '{step_type}' (Stage {stage_num}) for import '{single_import}'...")
 
             step_jobs = []
             if step_type == "PLACE_AGGREGATION" or step_type == "place":
@@ -199,13 +199,13 @@ def _wait_for_jobs(self, job_ids: List[str], poll_interval: int = 5) -> None:
             status = status_info.get("status")
 
             if status == "DONE":
-                logging.info("All BigQuery jobs in round completed successfully.")
+                logging.info("All BigQuery jobs in stage completed successfully.")
                 return
             elif status == "FAILED":
                 error_msg = status_info.get("error", "One or more BigQuery jobs failed.")
-                logging.error(f"Round execution failed: {error_msg}")
+                logging.error(f"Stage execution failed: {error_msg}")
                 raise RuntimeError(f"Aggregation execution failed: {error_msg}")
-            
+
             time.sleep(poll_interval)
 
     def _trigger_place(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index 999ca8d36..c724248d2 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -31,7 +31,7 @@
         input_imports:
           - USFed_Census
         output_import: USFed_Census_AggState
-        round: 1
+        stage: 1
         place_aggregation:
           from_place_types: County
           to_place_types: State
@@ -40,7 +40,7 @@
         input_imports:
           - USFed_Census
         output_import: USFed_Census_StatVarAgg
-        round: 2
+        stage: 2
         stat_var_aggregation:
           aggregations:
             - ancestor_sv_id: Count_Person
@@ -53,7 +53,7 @@
 
 @patch('aggregation.orchestrator.BigQueryExecutor')
 class TestOrchestratorScanning(unittest.TestCase):
-    """Tests round scanning and active round resolution methods."""
+    """Tests stage scanning and active stage resolution methods."""
 
     def setUp(self):
         self.tmpdir = tempfile.TemporaryDirectory()
@@ -72,20 +72,20 @@ def setUp(self):
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def test_get_active_rounds_for_import(self, mock_executor):
-        """Tests getting active rounds for matching and non-matching imports."""
-        rounds = self.orchestrator.get_active_rounds_for_import("USFed_Census")
-        self.assertEqual(rounds, [1, 2])
+    def test_get_active_stages_for_import(self, mock_executor):
+        """Tests getting active stages for matching and non-matching imports."""
+        stages = self.orchestrator.get_active_stages_for_import("USFed_Census")
+        self.assertEqual(stages, [1, 2])
 
-        rounds = self.orchestrator.get_active_rounds_for_import("OtherImport")
-        self.assertEqual(rounds, [])
+        stages = self.orchestrator.get_active_stages_for_import("OtherImport")
+        self.assertEqual(stages, [])
 
 
 @patch('aggregation.orchestrator.BigQueryExecutor')
 @patch('aggregation.orchestrator.PlaceAggregationGenerator')
 @patch('aggregation.orchestrator.StatVarAggregator')
 class TestOrchestratorExecution(unittest.TestCase):
-    """Tests round execution, verifying job submission and synchronization."""
+    """Tests stage execution, verifying job submission and synchronization."""
 
     def setUp(self):
         self.tmpdir = tempfile.TemporaryDirectory()
@@ -105,7 +105,7 @@ def tearDown(self):
         self.tmpdir.cleanup()
 
     def test_run_synchronized_pipeline(self, mock_sv_agg, mock_place_gen, mock_executor_cls):
-        """Tests complete synchronized run pipeline for an import across rounds."""
+        """Tests complete synchronized run pipeline for an import across stages."""
         mock_job1 = MagicMock()
         mock_job1.job_id = "job-place-1"
         mock_place_gen.return_value.aggregate_places.return_value = mock_job1
diff --git a/pipeline/workflow/ingestion-helper/aggregation/schema.json b/pipeline/workflow/ingestion-helper/aggregation/schema.json
index f6cad1f71..2d66ff0a0 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/schema.json
+++ b/pipeline/workflow/ingestion-helper/aggregation/schema.json
@@ -28,7 +28,7 @@
             "minItems": 1
           },
           "output_import": { "type": "string" },
-          "round": { "type": "integer" },
+          "stage": { "type": "integer" },
           "place_aggregation": { "type": "object" },
           "stat_var_aggregation": { "type": "object" },
           "entity_aggregation": { "type": "object" },
diff --git a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
index 35f94ef04..01b01d4eb 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/validator_test.py
@@ -46,7 +46,7 @@ def test_validate_config_success_all_types(self):
                 input_imports:
                   - CensusACS5YearSurvey
                 output_import: CensusACS5YearSurvey_AggCountry
-                round: 1
+                stage: 1
                 place_aggregation:
                   from_place_types: State
                   to_place_types: Country
@@ -77,7 +77,7 @@ def test_validate_config_success_all_types(self):
               - type: STAT_VAR_SERIES_AGGREGATION
                 input_imports:
                   - NASA_NEXDCP30
-                round: 1
+                stage: 1
                 output_import: NASA_NEXDCP30_AggrDiffStats
                 stat_var_series_aggregation:
                   aggr_funcs:

From cb0e2da0339296b0bc5523036b5f2eb34df06148 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 19:53:06 +0530
Subject: [PATCH 24/33] feat(aggregation): add common.yaml config, remove
 legacy aggregation.yaml, and update README

---
 .../ingestion-helper/aggregation.yaml         |  17 ---
 .../ingestion-helper/aggregation/README.md    | 138 ++++++------------
 .../aggregation/configs/common.yaml           |  18 +++
 .../aggregation/orchestrator.py               |  10 +-
 .../ingestion-helper/aggregation/schema.json  |   5 +-
 5 files changed, 73 insertions(+), 115 deletions(-)
 delete mode 100644 pipeline/workflow/ingestion-helper/aggregation.yaml
 create mode 100644 pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml

diff --git a/pipeline/workflow/ingestion-helper/aggregation.yaml b/pipeline/workflow/ingestion-helper/aggregation.yaml
deleted file mode 100644
index b8786aa51..000000000
--- a/pipeline/workflow/ingestion-helper/aggregation.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-# Data Commons Aggregation Configuration. See the README for details.
-
-aggregations:
-  # Generates linkedContainedInPlace, linkedMemberOf, etc.
-  - type: linked_edges
-    imports: ["*"]
-    stage: 1
-
-  # Generates summary statistics in the Cache table
-  - type: provenance_summary
-    imports: ["*"]
-    stage: 1
-
-  # Generates the Statistical Variable hierarchy/verticals
-  - type: stat_var_groups
-    imports: ["*"]
-    stage: 1
diff --git a/pipeline/workflow/ingestion-helper/aggregation/README.md b/pipeline/workflow/ingestion-helper/aggregation/README.md
index ec61e5a40..53e96feda 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/README.md
+++ b/pipeline/workflow/ingestion-helper/aggregation/README.md
@@ -1,136 +1,88 @@
 # Aggregations
 
-This module orchestrates the execution of Data Commons aggregations through BigQuery Federation. The aggregations include place rollups, statistical variable aggregations, linked edges, and metadata summaries.
+This module orchestrates the execution of Data Commons aggregations through BigQuery Federation. The aggregations include place rollups, statistical variable aggregations, entity aggregations, linked edges, and metadata summaries.
 
 ## Core Concepts
 
 *   **Sequential Stages**: Aggregations are executed sequentially by their `stage` number (e.g., Stage 1 steps are guaranteed to complete before Stage 2 steps begin). This is useful when later steps depend on the output of earlier ones.
 *   **Parallel Execution**: All aggregation steps configured in the same stage are executed in parallel to maximize performance.
+*   **Per-Import Isolation**: Aggregations run independently for each active import dataset.
 
 ---
 
-## Configuration Guide (`aggregation.yaml`)
+## Configuration Guide (`configs/*.yaml`)
 
-The entire aggregation pipeline is configured via `aggregation.yaml`. This file defines which aggregations run, what their dependencies are, and in what order they execute.
+The aggregation pipeline is configured via YAML files in the `configs/` directory (`place.yaml`, `statvar.yaml`, `common.yaml`, etc.). Each file defines a top-level `calculations:` list.
 
 ### Common Configuration Fields
-Every step in the configuration supports these common fields:
-*   `type` (string, Required): The type of aggregation step to run.
+Every calculation step supports these common fields:
+*   `type` (string, Required): The type of calculation step (e.g. `PLACE_AGGREGATION`, `STAT_VAR_AGGREGATION`, `LINKED_EDGES`).
 *   `stage` (integer, Optional, default: 1): The sequential stage number. Steps in lower stages are guaranteed to finish before higher stages start.
-*   `imports` (list of strings, Required): The list of import names this step applies to. Use `["*"]` (wildcard) to apply the step to **all** imports in the current run.
+*   `input_imports` (list of strings, Required): The list of import names this step applies to. Use `["*"]` (wildcard) to apply the step to **all** active imports.
+*   `output_import` (string, Optional): The output import dataset name to write aggregated observations under.
 *   `disabled` (boolean, Optional, default: false): Set to `true` to temporarily disable a step without deleting it.
 
 ---
 
-### Supported Aggregation Types
+### Supported Calculation Types
 
-#### 1. Place (`place`)
+#### 1. Place Aggregation (`PLACE_AGGREGATION`)
 Aggregates and rolls up statistical data from a smaller place type (source) to a larger place type (destination).
 *   **Fields**:
-    *   `source_type` (string, Required): The source place type (e.g., `County`).
-    *   `destination_type` (string, Required): The destination place type (e.g., `State`).
+    *   `from_place_types` (string, Required): The source place type (e.g., `State`).
+    *   `to_place_types` (string, Required): The destination place type (e.g., `Country`).
     *   `allow_multiple_to_places` (boolean, Optional, default: false): Allows mapping to multiple parent places if true.
 *   **Example**:
     ```yaml
-    - type: place
+    - type: PLACE_AGGREGATION
       stage: 1
-      imports: ["USFed_Census"]
-      source_type: County
-      destination_type: State
+      input_imports:
+        - CensusACS5YearSurvey
+      output_import: CensusACS5YearSurvey_AggCountry
+      place_aggregation:
+        from_place_types: State
+        to_place_types: Country
     ```
 
-#### 2. Statistical Variable Aggregation (`stat_var`)
-Aggregates raw statistical variables into a summarized ancestor variable (e.g., summing up individual age group counts to get a total population count).
+#### 2. Statistical Variable Aggregation (`STAT_VAR_AGGREGATION`)
+Aggregates raw statistical variables into a summarized ancestor variable.
 *   **Fields**:
     *   `ancestor_sv_id` (string, Required): The ID of the parent/summary statistical variable (e.g., `Count_Person`).
     *   `source_sv_ids` (list of strings, Required): The list of individual statistical variables to sum up.
-    *   `output_import_name` (string, Optional): Custom import name to write output under.
     *   `skip_all_sources_present_check` (boolean, Optional, default: false): If true, aggregates even if some source variables are missing.
 *   **Example**:
     ```yaml
-    - type: stat_var
+    - type: STAT_VAR_AGGREGATION
       stage: 2
-      imports: ["USFed_Census"]
-      ancestor_sv_id: Count_Person
-      source_sv_ids:
-        - Count_Person_Male
-        - Count_Person_Female
+      input_imports:
+        - CensusACS5YearSurvey
+      output_import: CensusACS5YearSurvey_StatVarAgg
+      stat_var_aggregation:
+        aggregations:
+          - ancestor_sv_id: Count_Person
+            source_sv_ids:
+              - Count_Person_Male
+              - Count_Person_Female
     ```
 
-#### 3. Linked Edges (`linked_edges`)
-Constructs and aggregates structural graph links (edges) between nodes in the Data Commons graph.
+#### 3. Common Aggregations (`LINKED_EDGES`, `PROVENANCE_SUMMARY`, `STAT_VAR_GROUPS`)
+Common graph structure, lineage, and UI group hierarchy rollups defined in `common.yaml`.
 *   **Example**:
     ```yaml
-    - type: linked_edges
+    - type: LINKED_EDGES
       stage: 1
-      imports: ["*"] # Runs for all imports
+      input_imports:
+        - "*"
     ```
 
-#### 4. Provenance Summary (`provenance_summary`)
-Generates metadata and provenance summaries for all aggregated statistical observations, establishing data lineage.
-*   **Example**:
-    ```yaml
-    - type: provenance_summary
-      stage: 3
-      imports: ["USFed_Census"]
-    ```
-
-#### 5. Statistical Variable Groups (`stat_var_groups`)
-Aggregates and structures statistical variables into hierarchical groups for display in the Data Commons UI.
-*   **Example**:
-    ```yaml
-    - type: stat_var_groups
-      stage: 3
-      imports: ["*"]
-    ```
-
----
-
-### Example `aggregation.yaml`
-
-This example demonstrates a typical multi-stage aggregation workflow.
-
-```yaml
-# aggregation.yaml
-aggregations:
-  # Stage 1: Parallel Place Rollups and Linked Edges
-  - type: linked_edges
-    stage: 1
-    imports: ["*"]
-
-  - type: place
-    stage: 1
-    imports: ["USFed_Census"]
-    source_type: County
-    destination_type: State
-
-  # Stage 2: Parallel Stat Var Aggregations (Depends on Stage 1 completing)
-  - type: stat_var
-    stage: 2
-    imports: ["USFed_Census"]
-    ancestor_sv_id: Count_Person
-    source_sv_ids:
-      - Count_Person_Male
-      - Count_Person_Female
-
-  # Stage 3: Metadata and UI Summaries (Depends on Stage 2 completing)
-  - type: provenance_summary
-    stage: 3
-    imports: ["USFed_Census"]
-
-  - type: stat_var_groups
-    stage: 3
-    imports: ["*"]
-```
-
 ---
 
 ## Local Configuration Validation
 
-The orchestrator strictly validates the `aggregation.yaml` file on startup against a strict JSON Schema (`schema.json`). If there is any syntax error, type mismatch, or missing required field, the service will fail to start.
+The orchestrator strictly validates configuration files against `schema.json`. If there is any syntax error, type mismatch, or missing required field, validation will fail.
 
 ### Running the Validator Locally
-You can validate your `aggregation.yaml` file locally using the built-in CLI tool before committing or deploying changes.
+You can validate all configuration files locally using the built-in CLI tool:
 
 1.  **Navigate to the ingestion-helper root**:
     ```bash
@@ -138,10 +90,12 @@ You can validate your `aggregation.yaml` file locally using the built-in CLI too
     ```
 2.  **Run the validator**:
     ```bash
-    python3 -m aggregation.validator --config ../aggregation.yaml
-
-    # sample output...
-    # Validating 'aggregation.yaml' against 'schema.json'...
-    # [SUCCESS] Configuration is valid!
-    # Parsed 5 aggregation steps successfully.
+    python3 -m aggregation.validator
+
+    # Sample output:
+    # Validating 7 configuration file(s) in 'aggregation/configs' against 'schema.json'...
+    #   ✓ common.yaml (3 calculation steps)
+    #   ✓ place.yaml (16 calculation steps)
+    #   ✓ statvar.yaml (21 calculation steps)
+    # [SUCCESS] All 7 configuration file(s) passed validation! (64 calculation steps total)
     ```
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml
new file mode 100644
index 000000000..59986da18
--- /dev/null
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml
@@ -0,0 +1,18 @@
+calculations:
+  # Generates linkedContainedInPlace, linkedMemberOf, etc.
+  - type: LINKED_EDGES
+    input_imports:
+      - "*"
+    stage: 1
+
+  # Generates summary statistics in the Cache table
+  - type: PROVENANCE_SUMMARY
+    input_imports:
+      - "*"
+    stage: 1
+
+  # Generates the Statistical Variable hierarchy/verticals
+  - type: STAT_VAR_GROUPS
+    input_imports:
+      - "*"
+    stage: 1
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index 0f104ab49..33e64c7f3 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -162,15 +162,15 @@ def _dispatch_stage_steps(self, single_import: str, stage_num: int) -> List[Any]
             logging.info(f"Triggering '{step_type}' (Stage {stage_num}) for import '{single_import}'...")
 
             step_jobs = []
-            if step_type == "PLACE_AGGREGATION" or step_type == "place":
+            if step_type == "PLACE_AGGREGATION":
                 step_jobs = self._trigger_place(calc, [single_import])
-            elif step_type == "STAT_VAR_AGGREGATION" or step_type == "stat_var":
+            elif step_type == "STAT_VAR_AGGREGATION":
                 step_jobs = self._trigger_stat_var(calc, [single_import])
-            elif step_type == "linked_edges":
+            elif step_type == "LINKED_EDGES":
                 step_jobs = self._trigger_linked_edges(calc, [single_import])
-            elif step_type == "provenance_summary":
+            elif step_type == "PROVENANCE_SUMMARY":
                 step_jobs = self._trigger_provenance_summary(calc, [single_import])
-            elif step_type == "stat_var_groups":
+            elif step_type == "STAT_VAR_GROUPS":
                 step_jobs = self._trigger_stat_var_groups(calc, [single_import])
             else:
                 logging.warning(
diff --git a/pipeline/workflow/ingestion-helper/aggregation/schema.json b/pipeline/workflow/ingestion-helper/aggregation/schema.json
index 2d66ff0a0..8c0e85b8b 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/schema.json
+++ b/pipeline/workflow/ingestion-helper/aggregation/schema.json
@@ -19,7 +19,10 @@
               "ENTITY_AGGREGATION",
               "STAT_VAR_SERIES_AGGREGATION",
               "STAT_VAR_CALCULATION",
-              "SUPER_ENUM_AGGREGATION"
+              "SUPER_ENUM_AGGREGATION",
+              "LINKED_EDGES",
+              "PROVENANCE_SUMMARY",
+              "STAT_VAR_GROUPS"
             ]
           },
           "input_imports": {

From 78d4418c6431d218fd436b619d375b304d09dca8 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 20:23:12 +0530
Subject: [PATCH 25/33] revert(workflow): restore
 spanner-ingestion-workflow.yaml to origin/master

---
 .../workflow/spanner-ingestion-workflow.yaml  | 48 +++++++------------
 1 file changed, 17 insertions(+), 31 deletions(-)

diff --git a/pipeline/workflow/spanner-ingestion-workflow.yaml b/pipeline/workflow/spanner-ingestion-workflow.yaml
index 4bc2d5c6e..d0a878067 100644
--- a/pipeline/workflow/spanner-ingestion-workflow.yaml
+++ b/pipeline/workflow/spanner-ingestion-workflow.yaml
@@ -121,52 +121,38 @@ main:
 run_aggregation_job:
   params: [import_list, helper_url]
   steps:
-    # 1. Start the aggregation and get the initial state
-    - initiate_aggregation:
+    - run_aggregation:
         call: http.post
         args:
-          url: ${helper_url + "/aggregation/initiate"}
+          url: ${helper_url + "/aggregation/run"}
           timeout: 300
           auth:
             type: OIDC
           body:
             importList: ${import_list}
-        result: initiate_response
-    # 2. Store the state in a workflow variable
-    - assign_state:
-        assign:
-          - state: ${initiate_response.body}
-    # 3. State Check loop
-    - check_status_loop:
-        switch:
-          # Exit successfully if done
-          - condition: ${state.status == "SUCCEEDED"}
-            return: "OK"
-          # Raise error if failed
-          - condition: ${state.status == "FAILED"}
-            raise: ${state.error}
-        next: poll_and_wait
-    # 4. Sleep and Poll
-    - poll_and_wait:
+        result: aggregation_response
+    - check_aggregation_status_loop:
         steps:
-          - wait_step:
+          - wait_for_aggregation:
               call: sys.sleep
               args:
                 seconds: 300
-          # Pass the state back to the server, get the new state
-          - poll_server:
+          - check_aggregation_status:
               call: http.post
               args:
-                url: ${helper_url + "/aggregation/poll"}
+                url: ${helper_url + "/aggregation/status"}
                 auth:
                   type: OIDC
-                body: ${state}
-              result: poll_response
-          - update_state:
-              assign:
-                - state: ${poll_response.body}
-        next: check_status_loop
-
+                body:
+                  jobIds: ${aggregation_response.body.jobIds}
+              result: aggregation_status_response
+          - evaluate_aggregation_status:
+              switch:
+                - condition: ${aggregation_status_response.body.status == "DONE"}
+                  return: 'OK'
+                - condition: ${aggregation_status_response.body.status == "FAILED"}
+                  raise: ${aggregation_status_response.body.error}
+              next: check_aggregation_status_loop
 
 # This sub-workflow launches a Dataflow job and waits for it to complete.
 run_dataflow_job:

From dc008b6d5ac2e66562aa006847a8be42c79bd910 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 20:24:48 +0530
Subject: [PATCH 26/33] revert(workflow): restore
 spanner-ingestion-workflow.yaml and routes/aggregation.py to origin/master

---
 .../ingestion-helper/routes/aggregation.py    | 215 +++---------------
 1 file changed, 35 insertions(+), 180 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/routes/aggregation.py b/pipeline/workflow/ingestion-helper/routes/aggregation.py
index f15e8a4e1..7433f906d 100644
--- a/pipeline/workflow/ingestion-helper/routes/aggregation.py
+++ b/pipeline/workflow/ingestion-helper/routes/aggregation.py
@@ -13,70 +13,44 @@
 # limitations under the License.
 
 import logging
-from typing import Any, Dict, List, Optional
 from fastapi import APIRouter, HTTPException
-from pydantic import BaseModel, Field
-
+from utils.aggregation import AggregationUtils
 import config
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
 from routes.models import BaseResponse, ResponseStatus
-from aggregation import AggregationOrchestrator
 from utils.logging import log_start
 
-
-
-class AggregationWorkflowState(BaseModel):
-    """Represents the execution state of a multi-stage aggregation pipeline run.
-
-    This state object is passed back and forth between the client (Google Cloud
-    Workflows) and the helper service endpoints to durably maintain the progress
-    of a stateless, sequential aggregation run across multiple stages.
-    """
-    status: str = Field(..., description="Overall status of the run: RUNNING, SUCCEEDED, FAILED")
-    current_stage: int = Field(..., description="The stage currently executing")
-    active_job_ids: List[str] = Field(default_factory=list, description="BQ job IDs running in the current stage")
-    import_list: List[Dict[str, Any]] = Field(default_factory=list, description="Original list of imports")
-    error: Optional[str] = Field(default=None, description="Detailed error message if failed")
-
-class InitiateRequest(BaseModel):
-    importList: List[Dict[str, Any]] = Field(default_factory=list)
-
-# TODO: Remove AggregationRequest once all consumers migrate to /initiate and /poll
 class AggregationRequest(BaseModel):
-    """Temporary request model for compatibility run endpoint."""
     importList: List[Dict[str, Any]] = Field(default_factory=list)
 
-
-# TODO: Remove AggregationStatusRequest once all consumers migrate to /initiate and /poll
 class AggregationStatusRequest(BaseModel):
-    """Temporary request model for compatibility status endpoint."""
     jobIds: List[str] = Field(default_factory=list)
 
-
-# TODO: Remove AggregationResponse once all consumers migrate to /initiate and /poll
 class AggregationResponse(BaseResponse):
-    """Temporary response model for compatibility run endpoint."""
     jobIds: List[str] = Field(default_factory=list, description="BigQuery job IDs submitted for async aggregation")
 
-
-# TODO: Remove AggregationStatusResponse once all consumers migrate to /initiate and /poll
 class AggregationStatusResponse(BaseResponse):
-    """Temporary response model for compatibility status endpoint."""
     error: Optional[str] = Field(default=None, description="Detailed error message if failed")
     failedJobs: Optional[List[str]] = Field(default_factory=list, description="List of failed BigQuery job IDs")
 
-
-
 router = APIRouter(prefix="/aggregation", tags=["aggregation"])
 
-
-def _get_orchestrator() -> AggregationOrchestrator:
-    """Helper to initialize the orchestrator using global config."""
+@router.post("/run", response_model=AggregationResponse)
+@log_start
+def run_aggregation(req: AggregationRequest):
+    """Runs aggregation logic asynchronously for the specified imports, returning BigQuery job IDs."""
+    if not req.importList:
+        logging.info("Empty import list. Skipping aggregation.")
+        return AggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=[])
+        
     if not all([config.SPANNER_CONNECTION_ID, config.SPANNER_PROJECT_ID, config.SPANNER_INSTANCE_ID, config.SPANNER_GRAPH_DATABASE_ID]):
         raise HTTPException(
             status_code=400,
             detail="Missing required configuration environment variables: SPANNER_CONNECTION_ID, SPANNER_PROJECT_ID, SPANNER_INSTANCE_ID, or SPANNER_GRAPH_DATABASE_ID"
         )
-    return AggregationOrchestrator(
+        
+    aggregation = AggregationUtils(
         connection_id=config.SPANNER_CONNECTION_ID,
         project_id=config.SPANNER_PROJECT_ID,
         instance_id=config.SPANNER_INSTANCE_ID,
@@ -84,158 +58,39 @@ def _get_orchestrator() -> AggregationOrchestrator:
         location=config.LOCATION,
         is_base_dc=config.IS_BASE_DC,
     )
-
-
-
-@router.post("/initiate", response_model=AggregationWorkflowState)
-@log_start
-def initiate_aggregation(req: InitiateRequest):
-    """Initiates the aggregation run by executing Stage 1 and returning the initial state."""
-    if not req.importList:
-        logging.info("Empty import list. Skipping aggregation.")
-        return AggregationWorkflowState(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=[])
-
     try:
-        orchestrator = _get_orchestrator()
-        import_names = [item.get('importName') for item in req.importList if item.get('importName')]
-        
-        active_stages = orchestrator.get_active_stages(import_names)
-        if not active_stages:
-            logging.info("No stages have active aggregations for the current imports. Completing immediately.")
-            return AggregationWorkflowState(status="SUCCEEDED", current_stage=0, active_job_ids=[], import_list=req.importList)
-
-        first_stage = active_stages[0]
-
-        logging.info(f"Initiating aggregation at Stage {first_stage}")
-        job_ids = orchestrator.execute_stage(first_stage, import_names)
-        
-        return AggregationWorkflowState(
-            status="RUNNING",
-            current_stage=first_stage,
-            active_job_ids=job_ids,
-            import_list=req.importList
-        )
-    except Exception as e:
-        logging.error(f"Failed to initiate aggregation: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to initiate aggregation: {str(e)}")
-
-
-@router.post("/poll", response_model=AggregationWorkflowState)
-@log_start
-def poll_aggregation(state: AggregationWorkflowState):
-    """Checks progress of active jobs and transitions to the next stage if complete."""
-    if state.status != "RUNNING":
-        return state # Already in a terminal state
-
-    try:
-        orchestrator = _get_orchestrator()
-        import_names = [item.get('importName') for item in state.import_list if item.get('importName')]
-
-        # 1. Check status of active jobs in BigQuery
-        if not state.active_job_ids:
-            bq_status = {"status": "DONE"}
-        else:
-            logging.info(f"Polling status for jobs in Stage {state.current_stage}: {state.active_job_ids}")
-            bq_status = orchestrator.check_jobs_status(state.active_job_ids)
-        
-        # Case A: Any job failed
-        if bq_status["status"] == "FAILED":
-            logging.error(f"Stage {state.current_stage} failed with error: {bq_status.get('error')}")
-            return AggregationWorkflowState(
-                status="FAILED",
-                current_stage=state.current_stage,
-                active_job_ids=[],
-                import_list=state.import_list,
-                error=bq_status.get("error")
-            )
-            
-        # Case B: Jobs are still executing (explicitly check for DONE to transition)
-        if bq_status["status"] != "DONE":
-            logging.info(f"Stage {state.current_stage} is still executing (status: {bq_status['status']}).")
-            return state # Return unchanged
-            
-        # Case C: All jobs succeeded -> Find and execute the next active stage
-        active_stages = orchestrator.get_active_stages(import_names)
-        next_stages = [s for s in active_stages if s > state.current_stage]
-        
-        if next_stages:
-            next_stage = next_stages[0]
-            logging.info(f"Stage {state.current_stage} completed. Transitioning to Stage {next_stage}...")
-            new_job_ids = orchestrator.execute_stage(next_stage, import_names)
-            return AggregationWorkflowState(
-                status="RUNNING",
-                current_stage=next_stage,
-                active_job_ids=new_job_ids,
-                import_list=state.import_list
-            )
-            
-        # If we exit the loop, there are no more active stages left
-        logging.info("All aggregation stages completed successfully!")
-        return AggregationWorkflowState(
-            status="SUCCEEDED",
-            current_stage=state.current_stage,
-            active_job_ids=[],
-            import_list=state.import_list
-        )
-            
-    except Exception as e:
-        logging.error(f"Error during polling: {e}")
-        return AggregationWorkflowState(
-            status="FAILED",
-            current_stage=state.current_stage,
-            active_job_ids=[],
-            import_list=state.import_list,
-            error=f"Orchestrator error: {str(e)}"
-        )
-
-# TODO: Remove the /run endpoint once all consumers migrate to /initiate and /poll
-@router.post("/run", response_model=AggregationResponse, deprecated=True)
-@log_start
-def run_aggregation(req: AggregationRequest):
-    """Temporary endpoint. Runs ALL enabled aggregations in parallel (ignores stages).
-
-    Please migrate to /initiate and /poll endpoints.
-    """
-    if not req.importList:
-        logging.info("Empty import list. Skipping temporary aggregation.")
-        return AggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=[])
-        
-    try:
-        orchestrator = _get_orchestrator()
-        import_names = [item.get('importName') for item in req.importList if item.get('importName')]
-        
-        # Compatibility Mode: Submit ALL enabled stages in parallel
-        job_ids = []
-        active_stages = orchestrator.get_active_stages(import_names)
-        for stage_num in active_stages:
-            job_ids.extend(orchestrator.execute_stage(stage_num, import_names))
-                
+        job_ids = aggregation.run_aggregation(req.importList)
         return AggregationResponse(status=ResponseStatus.SUBMITTED, jobIds=job_ids)
     except Exception as e:
-        logging.error(f"Temporary aggregation failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Temporary aggregation failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Aggregation failed: {str(e)}")
 
-
-# TODO: Remove the /status endpoint once all consumers migrate to /initiate and /poll
-@router.post("/status", response_model=AggregationStatusResponse, deprecated=True)
-@log_start
-def get_aggregation_status(req: AggregationStatusRequest):
-    """Temporary endpoint. Checks the status of the submitted BigQuery jobs.
-
-    Please migrate to /initiate and /poll endpoints.
-    """
+@router.post("/status", response_model=AggregationStatusResponse)
+def check_aggregation_status(req: AggregationStatusRequest):
+    """Checks the status of the submitted aggregation BigQuery jobs."""
     if not req.jobIds:
         logging.info("Empty jobIds. Returning status DONE.")
         return AggregationStatusResponse(status=ResponseStatus.DONE)
         
+    if not all([config.SPANNER_CONNECTION_ID, config.SPANNER_PROJECT_ID, config.SPANNER_INSTANCE_ID, config.SPANNER_GRAPH_DATABASE_ID]):
+        raise HTTPException(
+            status_code=400,
+            detail="Missing required configuration environment variables."
+        )
+        
+    aggregation = AggregationUtils(
+        connection_id=config.SPANNER_CONNECTION_ID,
+        project_id=config.SPANNER_PROJECT_ID,
+        instance_id=config.SPANNER_INSTANCE_ID,
+        database_id=config.SPANNER_GRAPH_DATABASE_ID,
+        location=config.LOCATION,
+        is_base_dc=config.IS_BASE_DC,
+    )
     try:
-        orchestrator = _get_orchestrator()
-        status_info = orchestrator.check_jobs_status(req.jobIds)
+        status_info = aggregation.check_aggregation_status(req.jobIds)
         return AggregationStatusResponse(
             status=ResponseStatus.from_str(status_info.get("status", "ERROR")),
             error=status_info.get("error"),
-            failedJobs=status_info.get("failed_jobs", [])
+            failedJobs=status_info.get("failedJobs", [])
         )
     except Exception as e:
-        logging.error(f"Temporary check status failed: {e}")
-        raise HTTPException(status_code=500, detail=f"Temporary check status failed: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Aggregation status check failed: {str(e)}")

From 086db67b638f10155d722e49d80ed04dfb9b97e5 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 20:42:13 +0530
Subject: [PATCH 27/33] feat(aggregation): add informative name property to all
 calculation steps across YAML configs

---
 .../aggregation/configs/common.yaml           |  9 ++-
 .../aggregation/configs/entity.yaml           | 26 ++++----
 .../aggregation/configs/place.yaml            | 57 +++++++++-------
 .../aggregation/configs/statvar.yaml          | 65 ++++++++++++-------
 .../configs/statvar_calculation.yaml          |  6 +-
 .../aggregation/configs/statvar_series.yaml   | 30 ++++++---
 .../aggregation/configs/super_enum.yaml       | 15 +++--
 .../aggregation/orchestrator.py               |  3 +-
 .../ingestion-helper/aggregation/schema.json  |  1 +
 9 files changed, 132 insertions(+), 80 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml
index 59986da18..a066c3f0d 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml
@@ -1,18 +1,21 @@
 calculations:
   # Generates linkedContainedInPlace, linkedMemberOf, etc.
-  - type: LINKED_EDGES
+  - name: "Global: ContainedInPlace & MemberOf Graph Linked Edges"
+    type: LINKED_EDGES
     input_imports:
       - "*"
     stage: 1
 
   # Generates summary statistics in the Cache table
-  - type: PROVENANCE_SUMMARY
+  - name: "Global: Cache Provenance & Lineage Summary"
+    type: PROVENANCE_SUMMARY
     input_imports:
       - "*"
     stage: 1
 
   # Generates the Statistical Variable hierarchy/verticals
-  - type: STAT_VAR_GROUPS
+  - name: "Global: StatVar Group Hierarchy & Verticals"
+    type: STAT_VAR_GROUPS
     input_imports:
       - "*"
     stage: 1
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
index dcf30500d..4abb8737c 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
@@ -1,7 +1,8 @@
 calculations:
 
   # Earthquakes
-  - type: ENTITY_AGGREGATION
+  - name: "EarthquakeUSGS: Earthquake Event Aggregation"
+    type: ENTITY_AGGREGATION
     output_import: EarthquakeUSGS_Agg
     input_imports:
       - EarthquakeUSGS
@@ -30,7 +31,8 @@ calculations:
         - "magnitude: [8 9 M]"
 
   # Fires
-  - type: ENTITY_AGGREGATION
+  - name: "FireFAMWEB: Wildland Fire Event Aggregation"
+    type: ENTITY_AGGREGATION
     output_import: FireFAMWEB_Agg
     input_imports:
       - FireFAMWEB
@@ -43,7 +45,8 @@ calculations:
       agg_date_formats:
         - YYYY
 
-  - type: ENTITY_AGGREGATION
+  - name: "FireWFIGS: Fire Incident Event Aggregation"
+    type: ENTITY_AGGREGATION
     output_import: FireWFIGS_Agg
     input_imports:
       - FireWFIGS
@@ -59,7 +62,8 @@ calculations:
         - YYYY-MM
         - YYYY
 
-  - type: ENTITY_AGGREGATION
+  - name: "NASA_VIIRSActiveFiresEvents: Fire Event S2 Cell Aggregation"
+    type: ENTITY_AGGREGATION
     output_import: NASA_VIIRSActiveFiresEvents_Agg
     # Aggregate event counts for S2-cells of level 13
     input_imports:
@@ -75,7 +79,8 @@ calculations:
         - YYYY-MM
 
   # Storms
-  - type: ENTITY_AGGREGATION
+  - name: "StormNOAA: Storm & Extreme Weather Event Aggregation"
+    type: ENTITY_AGGREGATION
     output_import: StormNOAA_Agg
     input_imports:
       - StormNOAA
@@ -86,11 +91,6 @@ calculations:
       agg_date_formats:
         - YYYY
         - YYYY-MM
-      # The following query is used to get all the types:
-      #
-      # SELECT DISTINCT object_id
-      # FROM `datcom-store.dc_kg_latest.Triple`
-      # WHERE prov_id='dc/svnv9g3' AND predicate = 'typeOf'
       entity_types:
         - MarineHighWindEvent
         - HighWindEvent
@@ -156,7 +156,8 @@ calculations:
         - StormEpisode
 
   # Floods
-  - type: ENTITY_AGGREGATION
+  - name: "DynamicWorld_FloodEvents: Flood Event S2 Cell Aggregation"
+    type: ENTITY_AGGREGATION
     output_import: DynamicWorld_FloodEvents_Agg
     # Aggregate event counts for S2-cells of level 13
     input_imports:
@@ -172,7 +173,8 @@ calculations:
         - YYYY-MM
 
   # Heat/Cold Temperature Events counts
-  - type: ENTITY_AGGREGATION
+  - name: "TemperatureEvents: Heat & Cold Event Aggregation"
+    type: ENTITY_AGGREGATION
     output_import: TemperatureEvents_Agg
     input_imports:
       - TemperatureEvents
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
index cf90fb7bb..23943f141 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
@@ -1,5 +1,6 @@
 calculations:
-  - type: PLACE_AGGREGATION
+  - name: "CensusACS5YearSurvey: State -> Country Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - CensusACS5YearSurvey
     place_aggregation:
@@ -8,7 +9,8 @@ calculations:
     stage: 1
     output_import: CensusACS5YearSurvey_AggCountry
 
-  - type: PLACE_AGGREGATION
+  - name: "CensusSAHIE: State -> Country Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - CensusSAHIE
     place_aggregation:
@@ -17,7 +19,8 @@ calculations:
     stage: 1
     output_import: CensusSAHIE_AggCountry
 
-  - type: PLACE_AGGREGATION
+  - name: "CDCMortality: County -> State Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - CDCMortality
     place_aggregation:
@@ -26,7 +29,8 @@ calculations:
     stage: 1
     output_import: CDCMortality_AggState
 
-  - type: PLACE_AGGREGATION
+  - name: "CDCMortality: State -> Country Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - CDCMortality_AggState
     place_aggregation:
@@ -35,16 +39,8 @@ calculations:
     stage: 2
     output_import: CDCMortality_AggState_AggCountry
 
-  # - type: PLACE_AGGREGATION
-  #   input_imports:
-  #     - FBIGovCrime
-  #   place_aggregation:
-  #     from_place_types: State
-  #     to_place_types: Country
-  #   stage: 1
-  #   output_import: FBIGovCrime_AggCountry
-
-  - type: PLACE_AGGREGATION
+  - name: "DEA_ARCOS: County -> State Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - DEA_ARCOS
     place_aggregation:
@@ -53,7 +49,8 @@ calculations:
     stage: 1
     output_import: DEA_ARCOS_AggState
 
-  - type: PLACE_AGGREGATION
+  - name: "DEA_ARCOS: State -> Country Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - DEA_ARCOS_AggState
     place_aggregation:
@@ -62,7 +59,8 @@ calculations:
     stage: 2
     output_import: DEA_ARCOS_AggState_AggCountry
 
-  - type: PLACE_AGGREGATION
+  - name: "EPA_EJSCREEN: CensusBlockGroup -> CensusTract Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - EPA_EJSCREEN
     place_aggregation:
@@ -71,7 +69,8 @@ calculations:
     stage: 1
     output_import: EPA_EJSCREEN_AggCensusTract
 
-  - type: PLACE_AGGREGATION
+  - name: "EPA_EJSCREEN: CensusTract -> County Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - EPA_EJSCREEN_AggCensusTract
     place_aggregation:
@@ -80,7 +79,8 @@ calculations:
     stage: 2
     output_import: EPA_EJSCREEN_AggCensusTract_AggCounty
 
-  - type: PLACE_AGGREGATION
+  - name: "DeepSolar: CensusBlockGroup -> CensusTract Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - DeepSolar
     place_aggregation:
@@ -89,7 +89,8 @@ calculations:
     stage: 1
     output_import: DeepSolar_AggCensusTract
 
-  - type: PLACE_AGGREGATION
+  - name: "DeepSolar: CensusTract -> County Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - DeepSolar_AggCensusTract
     place_aggregation:
@@ -98,7 +99,8 @@ calculations:
     stage: 2
     output_import: DeepSolar_AggCensusTract_AggCounty
 
-  - type: PLACE_AGGREGATION
+  - name: "EPA_GHGRP: EpaReportingFacility -> County Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - EPA_GHGRP
     place_aggregation:
@@ -107,7 +109,8 @@ calculations:
     stage: 1
     output_import: EPA_GHGRP_AggCounty
 
-  - type: PLACE_AGGREGATION
+  - name: "EPA_GHGRP: EpaReportingFacility -> CensusZipCodeTabulationArea Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - EPA_GHGRP
     place_aggregation:
@@ -116,7 +119,8 @@ calculations:
     stage: 1
     output_import: EPA_GHGRP_AggCensusZipCodeTabulationArea
 
-  - type: PLACE_AGGREGATION
+  - name: "EPA_GHGRP: County -> State Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - EPA_GHGRP_AggCounty
     place_aggregation:
@@ -125,7 +129,8 @@ calculations:
     stage: 2
     output_import: EPA_GHGRP_AggCounty_AggState
 
-  - type: PLACE_AGGREGATION
+  - name: "RFF_USGridGeo: GeoGrid -> County Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - RFF_USGridGeo_WeatherVariabilityForecast
     place_aggregation:
@@ -135,7 +140,8 @@ calculations:
     stage: 1
     output_import: RFF_USGridGeo_WeatherVariabilityForecast_AggCounty
 
-  - type: PLACE_AGGREGATION
+  - name: "India_RBIStateDomesticProduct: State -> Country Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - India_RBIStateDomesticProduct
     place_aggregation:
@@ -145,7 +151,8 @@ calculations:
     stage: 1
     output_import: India_RBIStateDomesticProduct_AggCountry
 
-  - type: PLACE_AGGREGATION
+  - name: "India_RBIStateDomesticProduct: StatVarAgg State -> Country Place Rollup"
+    type: PLACE_AGGREGATION
     input_imports:
       - India_RBIStateDomesticProduct_StatVarAgg
     place_aggregation:
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
index c25e7fae2..9fd864a48 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
@@ -4,7 +4,8 @@ calculations:
 # Health Insurance Coverage
 # -------------------------
 #
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: Health Insurance SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_HealthInsurance_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -271,7 +272,8 @@ calculations:
       # NOTE: Values SpeakEnglishLessThanVeryWell and SpeakEnglishVeryWell already
       # have population values from a different table.
       #
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: Ability To Speak English SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_AbilityToSpeakEnglish_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -318,7 +320,8 @@ calculations:
       # aggregate to total and by gender.
       # (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B23001)
       #
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: In Armed Forces SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_InArmedForces_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -378,7 +381,8 @@ calculations:
             - dc/2zj8jthd4f563
 
       # Education - CensusACS5YearSurvey
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: Education SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_Education_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -593,8 +597,9 @@ calculations:
             - dc/dc9v9h3q8l8n7
 
       # Education - ACSED5YrSurvey
-  - type: STAT_VAR_AGGREGATION
-    output_import: ACSED5YrSurvey_StatVarAgg
+  - name: "CensusSAHIE_AggCountry: Health Insurance SV Aggregation"
+    type: STAT_VAR_AGGREGATION
+    output_import: CensusSAHIE_AggCountry_StatVarAgg
     input_imports:
       - ACSED5YrSurvey
     stat_var_aggregation:
@@ -621,7 +626,8 @@ calculations:
             - dc/bstxmnb4k1wrb
 
       # Demographics
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: Demographics SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_Demographics_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -638,7 +644,8 @@ calculations:
             - Count_Person_0To4Years_Female
 
       # Agriculture
-  - type: STAT_VAR_AGGREGATION
+  - name: "USDA_AgricultureCensus: Agriculture Producer SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: USDA_AgricultureCensus_Agriculture_StatVarAgg
     input_imports:
       - USDA_AgricultureCensus
@@ -659,7 +666,8 @@ calculations:
       # Employment
       # ------------------------
 
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: Employment & Industry SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_Employment_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -849,7 +857,8 @@ calculations:
             #   }
 
       # Crime
-  - type: STAT_VAR_AGGREGATION
+  - name: "USNationalPrisonerStatistics: Crime & Correctional Facility SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: USNationalPrisonerStatistics_Crime_StatVarAgg
     input_imports:
       - USNationalPrisonerStatistics
@@ -861,7 +870,8 @@ calculations:
             - dc/91vy0sf20wlg9
 
       # WithOwnChildrenUnder18.
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey_SubjectTables_S1251: Children & Household SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_SubjectTables_S1251_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S1251
@@ -881,7 +891,8 @@ calculations:
             - Count_Person_WithOwnChildrenUnder18_Male
 
       # Marriage
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey_SubjectTables_S1201: Marital Status SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_SubjectTables_S1201_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S1201
@@ -905,7 +916,8 @@ calculations:
             - Count_Person_InLaborForce_Male_Widowed
 
       # Employment by business ownership type.
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey_SubjectTables_S2408: Business Ownership Employment SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_SubjectTables_S2408_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S2408
@@ -921,7 +933,8 @@ calculations:
             - Count_Person_PrivatelyOwnedForProfitEstablishment_Female_PaidWorker
 
       # US Citizen by Naturalization
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey_SubjectTables_S0504: Naturalized Citizenship SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_SubjectTables_S0504_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S0504
@@ -1051,7 +1064,8 @@ calculations:
             - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn
 
       # HousingUnit HomeValue.
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: Home Value SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_HousingUnit_HomeValue_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -1101,7 +1115,8 @@ calculations:
             - Count_HousingUnit_HomeValue750000To999999USDollar
           skip_all_sources_present_check: true
 
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: Person Age Groups SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_Person_Age_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -1116,7 +1131,8 @@ calculations:
             - Count_Person_62To64Years
           skip_all_sources_present_check: true
 
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey: Individual Income SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_Income_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
@@ -1307,7 +1323,8 @@ calculations:
             # Population: Male, 10,000 - 12,499 USD, White Alone Not Hispanic or Latino, Worked Full Time
             - dc/5jp07brw3g26h
 
-  - type: STAT_VAR_AGGREGATION
+  - name: "DEA_ARCOS: Opioid Prescription SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: IndiaNSS_HealthAilments_StatVarAgg
     input_imports:
       - IndiaNSS_HealthAilments
@@ -1618,7 +1635,8 @@ calculations:
             - Count_MedicalConditionIncident_70OrMoreYears_Male_ConditionSkinDisease_AsAFractionOf_Count_Person
           skip_all_sources_present_check: true
 
-  - type: STAT_VAR_AGGREGATION
+  - name: "OECDRegionalDemography: Person Age Groups SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: OECDRegionalDemography_Person_Age_StatVarAgg
     input_imports:
       - OECDRegionalDemography_Population
@@ -1703,7 +1721,8 @@ calculations:
             - Count_Person_80OrMoreYears
           skip_all_sources_present_check: true
 
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey_SubjectTables_S0801: Work Commute SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_SubjectTables_S0801_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S0801
@@ -1819,7 +1838,8 @@ calculations:
             - Count_Person_Years16Onwards_35To44Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
             - Count_Person_Years16Onwards_45To59Minute_WorkCommute_Employed_Female_WorkedOutsideOfHome
             - Count_Person_Years16Onwards_60OrMoreMinute_WorkCommute_Employed_Female_WorkedOutsideOfHome
-  - type: STAT_VAR_AGGREGATION
+  - name: "CensusACS5YearSurvey_SubjectTables_S0804: Means of Transportation Work Commute SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: CensusACS5YearSurvey_SubjectTables_S0804_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S0804
@@ -1858,7 +1878,8 @@ calculations:
             - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_35To44Minute_WorkCommute_Employed_WorkedOutsideOfHome
             - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_45To59Minute_WorkCommute_Employed_WorkedOutsideOfHome
             - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_60OrMoreMinute_WorkCommute_Employed_WorkedOutsideOfHome
-  - type: STAT_VAR_AGGREGATION
+  - name: "India_RBIStateDomesticProduct: Gross Value Added SV Aggregation"
+    type: STAT_VAR_AGGREGATION
     output_import: India_RBIStateDomesticProduct_StatVarAgg
     input_imports:
       - India_RBIStateDomesticProduct
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
index 12408e433..7602fb587 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
@@ -1,7 +1,8 @@
 calculations:
 
 # Energy.
-  - type: STAT_VAR_CALCULATION
+  - name: "EIA_Electricity: Annual Emissions Per Capita StatVar Calculation"
+    type: STAT_VAR_CALCULATION
     input_imports:
       - EPA_GHGRP_AggCounty
       - EPA_GHGRP_AggCensusZipCodeTabulationArea
@@ -39,7 +40,8 @@ calculations:
 # note that the output uses sv_prefix and measurement_method_prefix which is not
 # the case above, for example. Whenever sv and measurement_method are set in the
 # output, they are preferred. Otherwise, the *_prefix is used.
-  - type: STAT_VAR_CALCULATION
+  - name: "Climate: NASA CMIP6 Temperature Modeling Diffs Calculation"
+    type: STAT_VAR_CALCULATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational
       - NASA_NEXGDDP_CMIP6_IpccPlaces50
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
index c565076d9..f8ef1793c 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
@@ -1,5 +1,6 @@
 calculations:
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXDCP30: Measurement Methods & Base Date Diff Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXDCP30
     stage: 1
@@ -13,7 +14,8 @@ calculations:
                   - "1990"
                   - "2006"
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_Subnational: Measurement Methods & Base Date Diff Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_Subnational
     stage: 1
@@ -29,7 +31,8 @@ calculations:
                   - "1990"
                   - "2006"
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_Country: Measurement Methods & Base Date Diff Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_Country
     stage: 1
@@ -43,7 +46,8 @@ calculations:
                   - "1990"
                   - "2006"
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_CMIP6_Subnational: Measurement Methods & Base Date Diff Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational
     stage: 1
@@ -59,7 +63,8 @@ calculations:
                   - "1990"
                   - "2006"
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_CMIP6_IpccPlaces50: Measurement Methods & Base Date Diff Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50
     stage: 1
@@ -75,7 +80,8 @@ calculations:
                   - "1990"
                   - "2006"
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_CMIP6_Subnational: Stats Across Models Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational
     stage: 2
@@ -89,7 +95,8 @@ calculations:
               - OPERATOR_PERCENTILE90
               - OPERATOR_PERCENTILE10
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_CMIP6_IpccPlaces50: Stats Across Models Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50
     stage: 2
@@ -103,7 +110,8 @@ calculations:
               - OPERATOR_PERCENTILE90
               - OPERATOR_PERCENTILE10
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_CMIP6_Subnational: Over Time Stats Across Models Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
     stage: 3
@@ -206,7 +214,8 @@ calculations:
                 comparison: OPERATOR_LE
                 output_cprop: minTemperature
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_CMIP6_IpccPlaces50: Over Time Stats Across Models Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
     stage: 3
@@ -309,7 +318,8 @@ calculations:
                 comparison: OPERATOR_LE
                 output_cprop: minTemperature
 
-  - type: STAT_VAR_SERIES_AGGREGATION
+  - name: "NASA_NEXGDDP_IpccPlaces50: Measurement Methods & Base Date Diff Series Aggregation"
+    type: STAT_VAR_SERIES_AGGREGATION
     input_imports:
       - NASA_NEXGDDP_IpccPlaces50
     stage: 1
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/super_enum.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/super_enum.yaml
index 661d51dcf..cd62f1877 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/super_enum.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/super_enum.yaml
@@ -1,25 +1,30 @@
 calculations:
-  - type: SUPER_ENUM_AGGREGATION
+  - name: "CensusACS5YearSurvey: Super Enum Mapping Aggregation"
+    type: SUPER_ENUM_AGGREGATION
     input_imports:
       - CensusACS5YearSurvey
     output_import: CensusACS5YearSurvey_SuperEnum
 
-  - type: SUPER_ENUM_AGGREGATION
+  - name: "CensusACS5YearSurvey_AggCountry: Super Enum Mapping Aggregation"
+    type: SUPER_ENUM_AGGREGATION
     input_imports:
       - CensusACS5YearSurvey_AggCountry
     output_import: CensusACS5YearSurvey_AggCountry_SuperEnum
 
-  - type: SUPER_ENUM_AGGREGATION
+  - name: "NCES_PrivateSchoolStats: Super Enum Mapping Aggregation"
+    type: SUPER_ENUM_AGGREGATION
     input_imports:
       - NCES_PrivateSchoolStats
     output_import: NCES_PrivateSchoolStats_SuperEnum
 
-  - type: SUPER_ENUM_AGGREGATION
+  - name: "NCES_PublicSchoolStats: Super Enum Mapping Aggregation"
+    type: SUPER_ENUM_AGGREGATION
     input_imports:
       - NCES_PublicSchoolStats
     output_import: NCES_PublicSchoolStats_SuperEnum
 
-  - type: SUPER_ENUM_AGGREGATION
+  - name: "NCES_SchoolDistrictStats: Super Enum Mapping Aggregation"
+    type: SUPER_ENUM_AGGREGATION
     input_imports:
       - NCES_SchoolDistrictStats
     output_import: NCES_SchoolDistrictStats_SuperEnum
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index 33e64c7f3..52f9de8a8 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -159,7 +159,8 @@ def _dispatch_stage_steps(self, single_import: str, stage_num: int) -> List[Any]
                 continue
 
             step_type = calc["type"]
-            logging.info(f"Triggering '{step_type}' (Stage {stage_num}) for import '{single_import}'...")
+            calc_name = calc.get("name") or step_type
+            logging.info(f"Triggering step: '{calc_name}' (Stage {stage_num}) for import '{single_import}'...")
 
             step_jobs = []
             if step_type == "PLACE_AGGREGATION":
diff --git a/pipeline/workflow/ingestion-helper/aggregation/schema.json b/pipeline/workflow/ingestion-helper/aggregation/schema.json
index 8c0e85b8b..ab59d0f15 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/schema.json
+++ b/pipeline/workflow/ingestion-helper/aggregation/schema.json
@@ -11,6 +11,7 @@
         "type": "object",
         "required": ["type", "input_imports"],
         "properties": {
+          "name": { "type": "string" },
           "type": {
             "type": "string",
             "enum": [

From fc4f2c44ffe605ee2863a0a6aa12274f42039d03 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 20:50:52 +0530
Subject: [PATCH 28/33] style(aggregation): reorder keys in YAML configs for
 consistent field structure

---
 .../aggregation/configs/common.yaml           |  6 +-
 .../aggregation/configs/entity.yaml           | 14 ++--
 .../aggregation/configs/place.yaml            | 64 +++++++++----------
 .../aggregation/configs/statvar.yaml          | 42 ++++++------
 .../aggregation/configs/statvar_series.yaml   | 20 +++---
 5 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml
index a066c3f0d..27cc90dac 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/common.yaml
@@ -2,20 +2,20 @@ calculations:
   # Generates linkedContainedInPlace, linkedMemberOf, etc.
   - name: "Global: ContainedInPlace & MemberOf Graph Linked Edges"
     type: LINKED_EDGES
+    stage: 1
     input_imports:
       - "*"
-    stage: 1
 
   # Generates summary statistics in the Cache table
   - name: "Global: Cache Provenance & Lineage Summary"
     type: PROVENANCE_SUMMARY
+    stage: 1
     input_imports:
       - "*"
-    stage: 1
 
   # Generates the Statistical Variable hierarchy/verticals
   - name: "Global: StatVar Group Hierarchy & Verticals"
     type: STAT_VAR_GROUPS
+    stage: 1
     input_imports:
       - "*"
-    stage: 1
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
index 4abb8737c..0f970c8c6 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/entity.yaml
@@ -3,9 +3,9 @@ calculations:
   # Earthquakes
   - name: "EarthquakeUSGS: Earthquake Event Aggregation"
     type: ENTITY_AGGREGATION
-    output_import: EarthquakeUSGS_Agg
     input_imports:
       - EarthquakeUSGS
+    output_import: EarthquakeUSGS_Agg
     entity_aggregation:
       entity_types:
         - EarthquakeEvent
@@ -33,9 +33,9 @@ calculations:
   # Fires
   - name: "FireFAMWEB: Wildland Fire Event Aggregation"
     type: ENTITY_AGGREGATION
-    output_import: FireFAMWEB_Agg
     input_imports:
       - FireFAMWEB
+    output_import: FireFAMWEB_Agg
     entity_aggregation:
       entity_types:
         - WildlandFireEvent
@@ -47,9 +47,9 @@ calculations:
 
   - name: "FireWFIGS: Fire Incident Event Aggregation"
     type: ENTITY_AGGREGATION
-    output_import: FireWFIGS_Agg
     input_imports:
       - FireWFIGS
+    output_import: FireWFIGS_Agg
     entity_aggregation:
       entity_types:
         - FireIncidentComplexEvent
@@ -64,10 +64,10 @@ calculations:
 
   - name: "NASA_VIIRSActiveFiresEvents: Fire Event S2 Cell Aggregation"
     type: ENTITY_AGGREGATION
-    output_import: NASA_VIIRSActiveFiresEvents_Agg
     # Aggregate event counts for S2-cells of level 13
     input_imports:
       - NASA_VIIRSActiveFiresEvents
+    output_import: NASA_VIIRSActiveFiresEvents_Agg
     entity_aggregation:
       entity_types:
         - FireEvent
@@ -81,9 +81,9 @@ calculations:
   # Storms
   - name: "StormNOAA: Storm & Extreme Weather Event Aggregation"
     type: ENTITY_AGGREGATION
-    output_import: StormNOAA_Agg
     input_imports:
       - StormNOAA
+    output_import: StormNOAA_Agg
     entity_aggregation:
       location_props:
         - affectedPlace
@@ -158,10 +158,10 @@ calculations:
   # Floods
   - name: "DynamicWorld_FloodEvents: Flood Event S2 Cell Aggregation"
     type: ENTITY_AGGREGATION
-    output_import: DynamicWorld_FloodEvents_Agg
     # Aggregate event counts for S2-cells of level 13
     input_imports:
       - DynamicWorld_FloodEvents
+    output_import: DynamicWorld_FloodEvents_Agg
     entity_aggregation:
       entity_types:
         - FloodEvent
@@ -175,9 +175,9 @@ calculations:
   # Heat/Cold Temperature Events counts
   - name: "TemperatureEvents: Heat & Cold Event Aggregation"
     type: ENTITY_AGGREGATION
-    output_import: TemperatureEvents_Agg
     input_imports:
       - TemperatureEvents
+    output_import: TemperatureEvents_Agg
     entity_aggregation:
       entity_types:
         - HeatTemperatureEvent
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
index 23943f141..1072f43cd 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/place.yaml
@@ -1,163 +1,163 @@
 calculations:
   - name: "CensusACS5YearSurvey: State -> Country Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - CensusACS5YearSurvey
+    output_import: CensusACS5YearSurvey_AggCountry
     place_aggregation:
       from_place_types: State
       to_place_types: Country
-    stage: 1
-    output_import: CensusACS5YearSurvey_AggCountry
 
   - name: "CensusSAHIE: State -> Country Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - CensusSAHIE
+    output_import: CensusSAHIE_AggCountry
     place_aggregation:
       from_place_types: State
       to_place_types: Country
-    stage: 1
-    output_import: CensusSAHIE_AggCountry
 
   - name: "CDCMortality: County -> State Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - CDCMortality
+    output_import: CDCMortality_AggState
     place_aggregation:
       from_place_types: County
       to_place_types: State
-    stage: 1
-    output_import: CDCMortality_AggState
 
   - name: "CDCMortality: State -> Country Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 2
     input_imports:
       - CDCMortality_AggState
+    output_import: CDCMortality_AggState_AggCountry
     place_aggregation:
       from_place_types: State
       to_place_types: Country
-    stage: 2
-    output_import: CDCMortality_AggState_AggCountry
 
   - name: "DEA_ARCOS: County -> State Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - DEA_ARCOS
+    output_import: DEA_ARCOS_AggState
     place_aggregation:
       from_place_types: County
       to_place_types: State
-    stage: 1
-    output_import: DEA_ARCOS_AggState
 
   - name: "DEA_ARCOS: State -> Country Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 2
     input_imports:
       - DEA_ARCOS_AggState
+    output_import: DEA_ARCOS_AggState_AggCountry
     place_aggregation:
       from_place_types: State
       to_place_types: Country
-    stage: 2
-    output_import: DEA_ARCOS_AggState_AggCountry
 
   - name: "EPA_EJSCREEN: CensusBlockGroup -> CensusTract Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - EPA_EJSCREEN
+    output_import: EPA_EJSCREEN_AggCensusTract
     place_aggregation:
       from_place_types: CensusBlockGroup
       to_place_types: CensusTract
-    stage: 1
-    output_import: EPA_EJSCREEN_AggCensusTract
 
   - name: "EPA_EJSCREEN: CensusTract -> County Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 2
     input_imports:
       - EPA_EJSCREEN_AggCensusTract
+    output_import: EPA_EJSCREEN_AggCensusTract_AggCounty
     place_aggregation:
       from_place_types: CensusTract
       to_place_types: County
-    stage: 2
-    output_import: EPA_EJSCREEN_AggCensusTract_AggCounty
 
   - name: "DeepSolar: CensusBlockGroup -> CensusTract Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - DeepSolar
+    output_import: DeepSolar_AggCensusTract
     place_aggregation:
       from_place_types: CensusBlockGroup
       to_place_types: CensusTract
-    stage: 1
-    output_import: DeepSolar_AggCensusTract
 
   - name: "DeepSolar: CensusTract -> County Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 2
     input_imports:
       - DeepSolar_AggCensusTract
+    output_import: DeepSolar_AggCensusTract_AggCounty
     place_aggregation:
       from_place_types: CensusTract
       to_place_types: County
-    stage: 2
-    output_import: DeepSolar_AggCensusTract_AggCounty
 
   - name: "EPA_GHGRP: EpaReportingFacility -> County Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - EPA_GHGRP
+    output_import: EPA_GHGRP_AggCounty
     place_aggregation:
       from_place_types: EpaReportingFacility
       to_place_types: County
-    stage: 1
-    output_import: EPA_GHGRP_AggCounty
 
   - name: "EPA_GHGRP: EpaReportingFacility -> CensusZipCodeTabulationArea Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - EPA_GHGRP
+    output_import: EPA_GHGRP_AggCensusZipCodeTabulationArea
     place_aggregation:
       from_place_types: EpaReportingFacility
       to_place_types: CensusZipCodeTabulationArea
-    stage: 1
-    output_import: EPA_GHGRP_AggCensusZipCodeTabulationArea
 
   - name: "EPA_GHGRP: County -> State Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 2
     input_imports:
       - EPA_GHGRP_AggCounty
+    output_import: EPA_GHGRP_AggCounty_AggState
     place_aggregation:
       from_place_types: County
       to_place_types: State
-    stage: 2
-    output_import: EPA_GHGRP_AggCounty_AggState
 
   - name: "RFF_USGridGeo: GeoGrid -> County Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - RFF_USGridGeo_WeatherVariabilityForecast
+    output_import: RFF_USGridGeo_WeatherVariabilityForecast_AggCounty
     place_aggregation:
       from_place_types: GeoGridPlace_0.25Deg
       to_place_types: County
       allow_multiple_to_places: true
-    stage: 1
-    output_import: RFF_USGridGeo_WeatherVariabilityForecast_AggCounty
 
   - name: "India_RBIStateDomesticProduct: State -> Country Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - India_RBIStateDomesticProduct
+    output_import: India_RBIStateDomesticProduct_AggCountry
     place_aggregation:
       from_place_types: State
       to_place_types: Country
       allow_multiple_to_places: true
-    stage: 1
-    output_import: India_RBIStateDomesticProduct_AggCountry
 
   - name: "India_RBIStateDomesticProduct: StatVarAgg State -> Country Place Rollup"
     type: PLACE_AGGREGATION
+    stage: 1
     input_imports:
       - India_RBIStateDomesticProduct_StatVarAgg
+    output_import: India_RBIStateDomesticProduct_StatVarAgg_AggCountry
     place_aggregation:
       from_place_types: State
       to_place_types: Country
       allow_multiple_to_places: true
-    stage: 1
-    output_import: India_RBIStateDomesticProduct_StatVarAgg_AggCountry
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
index 9fd864a48..2ced6d221 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
@@ -6,10 +6,10 @@ calculations:
 #
   - name: "CensusACS5YearSurvey: Health Insurance SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_HealthInsurance_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_HealthInsurance_StatVarAgg
     stat_var_aggregation:
       aggregations:
       # 1. Census Table B18135 includes insurance coverage by disability-status and
@@ -274,10 +274,10 @@ calculations:
       #
   - name: "CensusACS5YearSurvey: Ability To Speak English SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_AbilityToSpeakEnglish_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_AbilityToSpeakEnglish_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_SpeakEnglishNotAtAll
@@ -322,10 +322,10 @@ calculations:
       #
   - name: "CensusACS5YearSurvey: In Armed Forces SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_InArmedForces_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_InArmedForces_StatVarAgg
     stat_var_aggregation:
       aggregations:
       # Women in armed forces
@@ -383,10 +383,10 @@ calculations:
       # Education - CensusACS5YearSurvey
   - name: "CensusACS5YearSurvey: Education SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_Education_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_Education_StatVarAgg
     stat_var_aggregation:
       aggregations:
       # dc/g/Person_EducationalAttainment-5ThAnd6ThGrade
@@ -599,9 +599,9 @@ calculations:
       # Education - ACSED5YrSurvey
   - name: "CensusSAHIE_AggCountry: Health Insurance SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusSAHIE_AggCountry_StatVarAgg
     input_imports:
       - ACSED5YrSurvey
+    output_import: CensusSAHIE_AggCountry_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Parent_Occupation_Management_Business_Science_Arts
@@ -628,10 +628,10 @@ calculations:
       # Demographics
   - name: "CensusACS5YearSurvey: Demographics SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_Demographics_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_Demographics_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_85OrMoreYears
@@ -646,9 +646,9 @@ calculations:
       # Agriculture
   - name: "USDA_AgricultureCensus: Agriculture Producer SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: USDA_AgricultureCensus_Agriculture_StatVarAgg
     input_imports:
       - USDA_AgricultureCensus
+    output_import: USDA_AgricultureCensus_Agriculture_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_Producer
@@ -668,10 +668,10 @@ calculations:
 
   - name: "CensusACS5YearSurvey: Employment & Industry SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_Employment_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_Employment_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_16OrMoreYears_Civilian_Employed_InLaborForce_NAICSAgricultureForestryFishingHunting
@@ -859,9 +859,9 @@ calculations:
       # Crime
   - name: "USNationalPrisonerStatistics: Crime & Correctional Facility SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: USNationalPrisonerStatistics_Crime_StatVarAgg
     input_imports:
       - USNationalPrisonerStatistics
+    output_import: USNationalPrisonerStatistics_Crime_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_CorrectionalFacilityLocation_OutOfState
@@ -872,9 +872,9 @@ calculations:
       # WithOwnChildrenUnder18.
   - name: "CensusACS5YearSurvey_SubjectTables_S1251: Children & Household SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_SubjectTables_S1251_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S1251
+    output_import: CensusACS5YearSurvey_SubjectTables_S1251_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_WithOwnChildrenUnder18_Female
@@ -893,9 +893,9 @@ calculations:
       # Marriage
   - name: "CensusACS5YearSurvey_SubjectTables_S1201: Marital Status SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_SubjectTables_S1201_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S1201
+    output_import: CensusACS5YearSurvey_SubjectTables_S1201_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_InLaborForce_Divorced
@@ -918,9 +918,9 @@ calculations:
       # Employment by business ownership type.
   - name: "CensusACS5YearSurvey_SubjectTables_S2408: Business Ownership Employment SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_SubjectTables_S2408_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S2408
+    output_import: CensusACS5YearSurvey_SubjectTables_S2408_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_PrivatelyOwnedNotForProfitEstablishment_PaidWorker
@@ -935,9 +935,9 @@ calculations:
       # US Citizen by Naturalization
   - name: "CensusACS5YearSurvey_SubjectTables_S0504: Naturalized Citizenship SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_SubjectTables_S0504_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S0504
+    output_import: CensusACS5YearSurvey_SubjectTables_S0504_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn
@@ -1066,10 +1066,10 @@ calculations:
       # HousingUnit HomeValue.
   - name: "CensusACS5YearSurvey: Home Value SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_HousingUnit_HomeValue_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_HousingUnit_HomeValue_StatVarAgg
     stat_var_aggregation:
       aggregations:
       # Census ACS 5 year, Count_HousingUnit_HomeValue
@@ -1117,10 +1117,10 @@ calculations:
 
   - name: "CensusACS5YearSurvey: Person Age Groups SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_Person_Age_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_Person_Age_StatVarAgg
     stat_var_aggregation:
       aggregations:
       # Census ACS 5 year, Age group
@@ -1133,10 +1133,10 @@ calculations:
 
   - name: "CensusACS5YearSurvey: Individual Income SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_Income_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey
       - CensusACS5YearSurvey_AggCountry
+    output_import: CensusACS5YearSurvey_Income_StatVarAgg
     stat_var_aggregation:
       aggregations:
       # The following 9 aggregations are: High income (100,000 USD or More), by race.
@@ -1325,9 +1325,9 @@ calculations:
 
   - name: "DEA_ARCOS: Opioid Prescription SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: IndiaNSS_HealthAilments_StatVarAgg
     input_imports:
       - IndiaNSS_HealthAilments
+    output_import: IndiaNSS_HealthAilments_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_MedicalConditionIncident_ConditionBloodDisease_AsAFractionOf_Count_Person
@@ -1637,9 +1637,9 @@ calculations:
 
   - name: "OECDRegionalDemography: Person Age Groups SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: OECDRegionalDemography_Person_Age_StatVarAgg
     input_imports:
       - OECDRegionalDemography_Population
+    output_import: OECDRegionalDemography_Person_Age_StatVarAgg
     stat_var_aggregation:
       aggregations:
       # OECD, Age group
@@ -1723,9 +1723,9 @@ calculations:
 
   - name: "CensusACS5YearSurvey_SubjectTables_S0801: Work Commute SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_SubjectTables_S0801_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S0801
+    output_import: CensusACS5YearSurvey_SubjectTables_S0801_StatVarAgg
     stat_var_aggregation:
       aggregations:
       # Census ACS 5 year, Work Commute group
@@ -1840,9 +1840,9 @@ calculations:
             - Count_Person_Years16Onwards_60OrMoreMinute_WorkCommute_Employed_Female_WorkedOutsideOfHome
   - name: "CensusACS5YearSurvey_SubjectTables_S0804: Means of Transportation Work Commute SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: CensusACS5YearSurvey_SubjectTables_S0804_StatVarAgg
     input_imports:
       - CensusACS5YearSurvey_SubjectTables_S0804
+    output_import: CensusACS5YearSurvey_SubjectTables_S0804_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Count_Person_Years16Onwards_PublicTransportationExcludingTaxicab_WorkCommute_Employed_WorkedOutsideOfHome
@@ -1880,9 +1880,9 @@ calculations:
             - Count_Person_Years16Onwards_CarTruckOrVanCarpooled_60OrMoreMinute_WorkCommute_Employed_WorkedOutsideOfHome
   - name: "India_RBIStateDomesticProduct: Gross Value Added SV Aggregation"
     type: STAT_VAR_AGGREGATION
-    output_import: India_RBIStateDomesticProduct_StatVarAgg
     input_imports:
       - India_RBIStateDomesticProduct
+    output_import: India_RBIStateDomesticProduct_StatVarAgg
     stat_var_aggregation:
       aggregations:
         - ancestor_sv_id: Nominal_Amount_EconomicActivity_GrossValueAdded
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
index f8ef1793c..0d8b18280 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_series.yaml
@@ -1,9 +1,9 @@
 calculations:
   - name: "NASA_NEXDCP30: Measurement Methods & Base Date Diff Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 1
     input_imports:
       - NASA_NEXDCP30
-    stage: 1
     output_import: NASA_NEXDCP30_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -16,9 +16,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_Subnational: Measurement Methods & Base Date Diff Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 1
     input_imports:
       - NASA_NEXGDDP_Subnational
-    stage: 1
     output_import: NASA_NEXGDDP_Subnational_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -33,9 +33,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_Country: Measurement Methods & Base Date Diff Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 1
     input_imports:
       - NASA_NEXGDDP_Country
-    stage: 1
     output_import: NASA_NEXGDDP_Country_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -48,9 +48,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_CMIP6_Subnational: Measurement Methods & Base Date Diff Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 1
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational
-    stage: 1
     output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -65,9 +65,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_CMIP6_IpccPlaces50: Measurement Methods & Base Date Diff Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 1
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50
-    stage: 1
     output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:
@@ -82,9 +82,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_CMIP6_Subnational: Stats Across Models Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 2
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational
-    stage: 2
     output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
@@ -97,9 +97,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_CMIP6_IpccPlaces50: Stats Across Models Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 2
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50
-    stage: 2
     output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
@@ -112,9 +112,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_CMIP6_Subnational: Over Time Stats Across Models Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 3
     input_imports:
       - NASA_NEXGDDP_CMIP6_Subnational_AggrStatsAcrossModels
-    stage: 3
     output_import: NASA_NEXGDDP_CMIP6_Subnational_AggrOverTimeOnStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
@@ -216,9 +216,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_CMIP6_IpccPlaces50: Over Time Stats Across Models Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 3
     input_imports:
       - NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrStatsAcrossModels
-    stage: 3
     output_import: NASA_NEXGDDP_CMIP6_IpccPlaces50_AggrOverTimeOnStatsAcrossModels
     stat_var_series_aggregation:
       aggr_funcs:
@@ -320,9 +320,9 @@ calculations:
 
   - name: "NASA_NEXGDDP_IpccPlaces50: Measurement Methods & Base Date Diff Series Aggregation"
     type: STAT_VAR_SERIES_AGGREGATION
+    stage: 1
     input_imports:
       - NASA_NEXGDDP_IpccPlaces50
-    stage: 1
     output_import: NASA_NEXGDDP_IpccPlaces50_AggrDiffStats
     stat_var_series_aggregation:
       aggr_funcs:

From c8267c731d0a7578cdb5e5cec6617cd3e107d46f Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 20:54:18 +0530
Subject: [PATCH 29/33] style(aggregation): clean up comment indentation in
 statvar and statvar_calculation configs

---
 .../aggregation/configs/statvar.yaml          | 287 +++++++++---------
 .../configs/statvar_calculation.yaml          |  22 +-
 2 files changed, 153 insertions(+), 156 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
index 2ced6d221..f3992282a 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar.yaml
@@ -1,9 +1,6 @@
 calculations:
 
-#
-# Health Insurance Coverage
-# -------------------------
-#
+  # Health Insurance Coverage
   - name: "CensusACS5YearSurvey: Health Insurance SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -12,11 +9,11 @@ calculations:
     output_import: CensusACS5YearSurvey_HealthInsurance_StatVarAgg
     stat_var_aggregation:
       aggregations:
-      # 1. Census Table B18135 includes insurance coverage by disability-status and
-      #    age (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B18135).
-      #    We use that to compute total and by disability-status.
-      #
-      # Total uninsured
+        # 1. Census Table B18135 includes insurance coverage by disability-status and
+        #    age (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B18135).
+        #    We use that to compute total and by disability-status.
+        #
+        # Total uninsured
         - ancestor_sv_id: Count_Person_NoHealthInsurance
           source_sv_ids:
             # No Disability
@@ -27,7 +24,7 @@ calculations:
             - dc/bew8kj6l7tv93
             - dc/96dqj47csvmy8
             - dc/qr4s77egv27q2
-      # Total insured
+        # Total insured
         - ancestor_sv_id: Count_Person_WithHealthInsurance
           source_sv_ids:
             # No Disability
@@ -38,40 +35,40 @@ calculations:
             - dc/s5efzs4x817p5
             - dc/9j39148yn79zf
             - dc/p3v76jcvdx919
-      # Uninsured with no disability
-      # The following 4 aggregations come from parts of the above two aggregations.
+        # Uninsured with no disability
+        # The following 4 aggregations come from parts of the above two aggregations.
         - ancestor_sv_id: Count_Person_NoHealthInsurance_NoDisability
           source_sv_ids:
             # No Disability
             - dc/y0dvhk0sggzef
             - dc/kdg05h55y45y6
             - dc/9drszqwd2nef7
-      # Uninsured with disability
+        # Uninsured with disability
         - ancestor_sv_id: Count_Person_NoHealthInsurance_WithDisability
           source_sv_ids:
             # With Disability
             - dc/bew8kj6l7tv93
             - dc/96dqj47csvmy8
             - dc/qr4s77egv27q2
-      # Insured with no disability
+        # Insured with no disability
         - ancestor_sv_id: Count_Person_WithHealthInsurance_NoDisability
           source_sv_ids:
             # No Disability
             - dc/32mhsxvq7qsm4
             - dc/2s2dkbb7gz038
             - dc/dc8vqzkx18x0c
-      # Insured with disability
+        # Insured with disability
         - ancestor_sv_id: Count_Person_WithHealthInsurance_WithDisability
           source_sv_ids:
             # With Disability
             - dc/s5efzs4x817p5
             - dc/9j39148yn79zf
             - dc/p3v76jcvdx919
-      #
-      # 2. Census Table B27001 includes insurance coverage by age and gender
-      #    breakdown (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B27001). We
-      #    use that to compute gender breakdown.
-      #
+        #
+        # 2. Census Table B27001 includes insurance coverage by age and gender
+        #    breakdown (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B27001). We
+        #    use that to compute gender breakdown.
+        #
         - ancestor_sv_id: Count_Person_Female_NoHealthInsurance
           source_sv_ids:
             - dc/jx2q10tbnwhf3
@@ -116,8 +113,8 @@ calculations:
             - dc/wp3txgscfen9c
             - dc/b3n86k0h3h2w
             - dc/j8se7wv2gbdfd
-      # dc/g/Person_HealthInsurance-NoPrivateHealthInsurance
-      # Level 3
+        # dc/g/Person_HealthInsurance-NoPrivateHealthInsurance
+        # Level 3
         - ancestor_sv_id: Count_Person_NoPrivateHealthInsurance
           source_sv_ids:
             # Population: 26 - 34 Years, Female, No Private Health Insurance
@@ -168,8 +165,8 @@ calculations:
             - dc/wyfvejhn9fe7
             # Population: 6 - 17 Years, Male, No Private Health Insurance
             - dc/yfmzp444fj5r5
-      # dc/g/Person_HealthInsurance-NoPublicHealthInsurance
-      # Level 3
+        # dc/g/Person_HealthInsurance-NoPublicHealthInsurance
+        # Level 3
         - ancestor_sv_id: Count_Person_NoPublicHealthInsurance
           source_sv_ids:
             # Population: 26 - 34 Years, Female, No Public Health Insurance
@@ -220,8 +217,8 @@ calculations:
             - dc/vxecjxxbmhy43
             # Population: 25 - 34 Years, Female, No Public Health Insurance
             - dc/yjxmx3n02dx08
-      # dc/g/Person_HealthInsurance-WithOneTypeOfHealthInsurance
-      # Level 1
+        # dc/g/Person_HealthInsurance-WithOneTypeOfHealthInsurance
+        # Level 1
         - ancestor_sv_id: Count_Person_WithOneTypeOfHealthInsurance
           source_sv_ids:
             # Population: 18 Years or Less, With One Type of Health Insurance
@@ -236,8 +233,8 @@ calculations:
             - dc/thcbepzex9zd9
             # Population: 65 Years or More, With One Type of Health Insurance
             - dc/v1w52jrtvw6m2
-      # dc/g/Person_HealthInsurance-WithPublicCoverage
-      # Level 3
+        # dc/g/Person_HealthInsurance-WithPublicCoverage
+        # Level 3
         - ancestor_sv_id: Count_Person_WithPublicCoverage
           source_sv_ids:
             # Population: 18 Years or Less, With Disability, With Public Coverage
@@ -261,17 +258,17 @@ calculations:
             # Population: 19 - 64 Years, With Disability, With Public Coverage
             - dc/zkcy0f52ewh04
 
-      #
-      # Ability to speak English
-      # ------------------------
-      #
-      # We aggregate abilityToSpeakEnglish over nativity (2) and languageSpokenAtHome
-      # (4) for a total of 8 source SVs from Census Table B16005
-      #   (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B16005)
-      #
-      # NOTE: Values SpeakEnglishLessThanVeryWell and SpeakEnglishVeryWell already
-      # have population values from a different table.
-      #
+  #
+  # Ability to speak English
+  # ------------------------
+  #
+  # We aggregate abilityToSpeakEnglish over nativity (2) and languageSpokenAtHome
+  # (4) for a total of 8 source SVs from Census Table B16005
+  #   (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B16005)
+  #
+  # NOTE: Values SpeakEnglishLessThanVeryWell and SpeakEnglishVeryWell already
+  # have population values from a different table.
+  #
   - name: "CensusACS5YearSurvey: Ability To Speak English SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -311,15 +308,15 @@ calculations:
             - dc/yx0mdj3dnk3mb
             - dc/1x48nzrne2f88
 
-      #
-      # In Armed Forces
-      # ---------------
-      #
-      # Census Table B23001 includes count of people in armed forces (in which case
-      # In labor Forces is implied) broken down by gender and age. From that we
-      # aggregate to total and by gender.
-      # (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B23001)
-      #
+  #
+  # In Armed Forces
+  # ---------------
+  #
+  # Census Table B23001 includes count of people in armed forces (in which case
+  # In labor Forces is implied) broken down by gender and age. From that we
+  # aggregate to total and by gender.
+  # (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B23001)
+  #
   - name: "CensusACS5YearSurvey: In Armed Forces SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -328,7 +325,7 @@ calculations:
     output_import: CensusACS5YearSurvey_InArmedForces_StatVarAgg
     stat_var_aggregation:
       aggregations:
-      # Women in armed forces
+        # Women in armed forces
         - ancestor_sv_id: Count_Person_Female_InArmedForces
           source_sv_ids:
             - dc/173smewzddlb
@@ -341,7 +338,7 @@ calculations:
             - dc/8d182v9j04mg4
             - dc/lb3lb4mg82mph
             - dc/g2m31qc7q1x64
-      # Men in armed forces
+        # Men in armed forces
         - ancestor_sv_id: Count_Person_Male_InArmedForces
           source_sv_ids:
             - dc/vp1gqv00d2ql3
@@ -354,7 +351,7 @@ calculations:
             - dc/x63tjfw28tzvc
             - dc/fpx513jvf4xed
             - dc/2zj8jthd4f563
-      # Armed forces population, from combining the above two sets of SVs.
+        # Armed forces population, from combining the above two sets of SVs.
         - ancestor_sv_id: Count_Person_InArmedForces
           source_sv_ids:
             # Female
@@ -380,7 +377,7 @@ calculations:
             - dc/fpx513jvf4xed
             - dc/2zj8jthd4f563
 
-      # Education - CensusACS5YearSurvey
+  # Education - CensusACS5YearSurvey
   - name: "CensusACS5YearSurvey: Education SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -389,24 +386,24 @@ calculations:
     output_import: CensusACS5YearSurvey_Education_StatVarAgg
     stat_var_aggregation:
       aggregations:
-      # dc/g/Person_EducationalAttainment-5ThAnd6ThGrade
-      # Level 1
+        # dc/g/Person_EducationalAttainment-5ThAnd6ThGrade
+        # Level 1
         - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_5ThAnd6ThGrade
           source_sv_ids:
             # Population: 5th And 6th Grade, Female
             - Count_Person_25OrMoreYears_EducationalAttainment5ThAnd6ThGrade_Female
             # Population: 5th And 6th Grade, Male
             - Count_Person_25OrMoreYears_EducationalAttainment5ThAnd6ThGrade_Male
-      # dc/g/Person_EducationalAttainment-7ThAnd8ThGrade
-      # Level 1
+        # dc/g/Person_EducationalAttainment-7ThAnd8ThGrade
+        # Level 1
         - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_7ThAnd8ThGrade
           source_sv_ids:
             # Population: 7th And 8th Grade, Female
             - Count_Person_25OrMoreYears_EducationalAttainment7ThAnd8ThGrade_Female
             # Population: 7th And 8th Grade, Male
             - Count_Person_25OrMoreYears_EducationalAttainment7ThAnd8ThGrade_Male
-      # dc/g/Person_EducationalAttainment-9ThTo12ThGradeNoDiploma
-      # Level 3
+        # dc/g/Person_EducationalAttainment-9ThTo12ThGradeNoDiploma
+        # Level 3
         - ancestor_sv_id: Count_Person_EducationalAttainment_9ThTo12ThGradeNoDiploma
           source_sv_ids:
             # Population: 18 - 24 Years, 9th To 12th Grade No Diploma, Female
@@ -429,8 +426,8 @@ calculations:
             - dc/5sps7rmylm73b
             # Population: 65 Years or More, 9th To 12th Grade No Diploma, Male
             - dc/07hctc6f9e2k9
-      # dc/g/Person_EducationalAttainment-LessThan9ThGrade
-      # Level 3
+        # dc/g/Person_EducationalAttainment-LessThan9ThGrade
+        # Level 3
         - ancestor_sv_id: Count_Person_EducationalAttainment_LessThan9ThGrade
           source_sv_ids:
             # Population: 18 - 24 Years, Less Than 9th Grade, Female
@@ -453,8 +450,8 @@ calculations:
             - dc/292723k92k5tb
             # Population: 65 Years or More, Less Than 9th Grade, Male
             - dc/68pblb53csteb
-      # dc/g/Person_EducationalAttainment-LessThanHighSchoolDiploma
-      # Level 3
+        # dc/g/Person_EducationalAttainment-LessThanHighSchoolDiploma
+        # Level 3
         - ancestor_sv_id: Count_Person_EducationalAttainment_LessThanHighSchoolDiploma
           source_sv_ids:
             # Population: Less Than High School Diploma, Male, Two or More Races
@@ -493,16 +490,16 @@ calculations:
             - dc/yfrrvevrmyr74
             # Population: Less Than High School Diploma, Female, Two or More Races
             - dc/29l3m1z7d3n7c
-      # dc/g/Person_EducationalAttainment-NurseryTo4ThGrade
-      # Level 1
+        # dc/g/Person_EducationalAttainment-NurseryTo4ThGrade
+        # Level 1
         - ancestor_sv_id: Count_Person_Years25Onwards_EducationalAttainment_NurseryTo4ThGrade
           source_sv_ids:
             # Population: Nursery To 4th Grade, Female
             - Count_Person_25OrMoreYears_EducationalAttainmentNurseryTo4ThGrade_Female
             # Population: Nursery To 4th Grade, Male
             - Count_Person_25OrMoreYears_EducationalAttainmentNurseryTo4ThGrade_Male
-      # dc/g/Person_EducationalAttainment-SomeCollegeNoDegree
-      # Level 3
+        # dc/g/Person_EducationalAttainment-SomeCollegeNoDegree
+        # Level 3
         - ancestor_sv_id: Count_Person_EducationalAttainment_SomeCollegeNoDegree
           source_sv_ids:
             # Population: 18 - 24 Years, Some College No Degree, Female
@@ -540,7 +537,7 @@ calculations:
             - Count_Person_EducationalAttainment11ThGrade
             - Count_Person_EducationalAttainment12ThGradeNoDiploma
 
-      # The following 9 aggregations are: SomeCollegeOrAssociatesDegree, by race.
+        # The following 9 aggregations are: SomeCollegeOrAssociatesDegree, by race.
         - ancestor_sv_id: Count_Person_25OrMoreYears_SomeCollegeOrAssociatesDegree_AmericanIndianOrAlaskaNativeAlone
           source_sv_ids:
             # Population: Some College or Associates Degree, Female, American Indian or Alaska Native Alone
@@ -596,7 +593,7 @@ calculations:
             # Population: Some College or Associates Degree, Male, White Alone Not Hispanic or Latino
             - dc/dc9v9h3q8l8n7
 
-      # Education - ACSED5YrSurvey
+  # Education - ACSED5YrSurvey
   - name: "CensusSAHIE_AggCountry: Health Insurance SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -625,7 +622,7 @@ calculations:
             # Count of Parent: 16 Years or More, Civilian, Public School, Employed, in Labor Force, Service Occupations
             - dc/bstxmnb4k1wrb
 
-      # Demographics
+  # Demographics
   - name: "CensusACS5YearSurvey: Demographics SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -643,7 +640,7 @@ calculations:
             - Count_Person_0To4Years_Male
             - Count_Person_0To4Years_Female
 
-      # Agriculture
+  # Agriculture
   - name: "USDA_AgricultureCensus: Agriculture Producer SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -662,9 +659,9 @@ calculations:
             - Count_Person_Producer_AsianAlone
           skip_all_sources_present_check: true
 
-      #
-      # Employment
-      # ------------------------
+  #
+  # Employment
+  # ------------------------
 
   - name: "CensusACS5YearSurvey: Employment & Industry SV Aggregation"
     type: STAT_VAR_AGGREGATION
@@ -795,68 +792,68 @@ calculations:
             # Population: 16 Years or More, Civilian, Employed, in Labor Force, Male, Public Administration (NAICS/92)
             - dc/589p0gc36qem5
 
-      # Census Table B23001 includes count of employed civilians broken down by gender
-      # and age. From that we aggregate over age to get count of employed civilians
-      # for each gender.
-      # (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B23001)
-      #
-#   - type: STAT_VAR_AGGREGATION
+  # Census Table B23001 includes count of employed civilians broken down by gender
+  # and age. From that we aggregate over age to get count of employed civilians
+  # for each gender.
+  # (https://data.census.gov/cedsci/table?tid=ACSDT5Y2019.B23001)
+  #
+  #   - type: STAT_VAR_AGGREGATION
   #   type: STAT_VAR_AGGREGATION
-#     output_import: CensusACS5YearSurvey_Employment_StatVarAgg
-#     input_imports:
-#       - CensusACS5YearSurvey
-#       - CensusACS5YearSurvey_AggCountry
-#     stat_var_aggregation:
-#       aggregations:
-      # # Women Employed
-      #
-      # # The ancestor has "armedForcesStatus: dcs:Civilian", but the following sources
-      # # don't have. Fix it before adding this to aggregation.
-      # # dc/kz49wc5n3lhpd
-      # # dc/qcpg8c533syd6
-      # # dc/zv26z833d7g3c
-#         - ancestor_sv_id: Count_Person_Female_Employed
-#           source_sv_ids:
-#             - dc/yt1fm72s1y7b7
-#             - dc/hj65vwnt5csr7
-#             - dc/0sd3x3bb4qet5
-#             - dc/j4mzcc63n5zq5
-#             - dc/56jhsezrzl049
-#             - dc/ggx918j9p5tnf
-#             - dc/8rw47nf3ngle7
-#             - dc/hplj99j7mbfsh
-#             - dc/ksmx7fwfkm8lb
-#             - dc/6706lc55kg5d
-#             - dc/qcpg8c533syd6
-#             - dc/zv26z833d7g3c
-#             - dc/kz49wc5n3lhpd
-            #   }
-            #
-            # # The ancestor has "armedForcesStatus: dcs:Civilian", but the following sources
-            # # don't have. Fix it before adding this to aggregation.
-            # # dc/5cxs4br0jz02c
-            # # dc/s909dd4r22fw
-            # # dc/xepldf55yq6s5
-            #
-            # # Men Employed
-#         - ancestor_sv_id: Count_Person_Male_Employed
-#           source_sv_ids:
-#             - dc/2s6hps4z1qced
-#             - dc/xvtk9180lme1h
-#             - dc/twdr4c500yev5
-#             - dc/n0btf3nglhqqc
-#             - dc/7qwvkhr4tsyt8
-#             - dc/pkpm179bbh822
-#             - dc/gyrw68q6x77l9
-#             - dc/ly0fvmlf4mtf5
-#             - dc/m020zpvzch2gd
-#             - dc/sgm602ncby3bf
-#             - dc/xepldf55yq6s5
-#             - dc/s909dd4r22fw
-#             - dc/5cxs4br0jz02c
-            #   }
+  #     output_import: CensusACS5YearSurvey_Employment_StatVarAgg
+  #     input_imports:
+  #       - CensusACS5YearSurvey
+  #       - CensusACS5YearSurvey_AggCountry
+  #     stat_var_aggregation:
+  #       aggregations:
+  # # Women Employed
+  #
+  # # The ancestor has "armedForcesStatus: dcs:Civilian", but the following sources
+  # # don't have. Fix it before adding this to aggregation.
+  # # dc/kz49wc5n3lhpd
+  # # dc/qcpg8c533syd6
+  # # dc/zv26z833d7g3c
+  #         - ancestor_sv_id: Count_Person_Female_Employed
+  #           source_sv_ids:
+  #             - dc/yt1fm72s1y7b7
+  #             - dc/hj65vwnt5csr7
+  #             - dc/0sd3x3bb4qet5
+  #             - dc/j4mzcc63n5zq5
+  #             - dc/56jhsezrzl049
+  #             - dc/ggx918j9p5tnf
+  #             - dc/8rw47nf3ngle7
+  #             - dc/hplj99j7mbfsh
+  #             - dc/ksmx7fwfkm8lb
+  #             - dc/6706lc55kg5d
+  #             - dc/qcpg8c533syd6
+  #             - dc/zv26z833d7g3c
+  #             - dc/kz49wc5n3lhpd
+  #   }
+  #
+  # # The ancestor has "armedForcesStatus: dcs:Civilian", but the following sources
+  # # don't have. Fix it before adding this to aggregation.
+  # # dc/5cxs4br0jz02c
+  # # dc/s909dd4r22fw
+  # # dc/xepldf55yq6s5
+  #
+  # # Men Employed
+  #         - ancestor_sv_id: Count_Person_Male_Employed
+  #           source_sv_ids:
+  #             - dc/2s6hps4z1qced
+  #             - dc/xvtk9180lme1h
+  #             - dc/twdr4c500yev5
+  #             - dc/n0btf3nglhqqc
+  #             - dc/7qwvkhr4tsyt8
+  #             - dc/pkpm179bbh822
+  #             - dc/gyrw68q6x77l9
+  #             - dc/ly0fvmlf4mtf5
+  #             - dc/m020zpvzch2gd
+  #             - dc/sgm602ncby3bf
+  #             - dc/xepldf55yq6s5
+  #             - dc/s909dd4r22fw
+  #             - dc/5cxs4br0jz02c
+  #   }
 
-      # Crime
+  # Crime
   - name: "USNationalPrisonerStatistics: Crime & Correctional Facility SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -869,7 +866,7 @@ calculations:
             - dc/qgv9d3frn35qc
             - dc/91vy0sf20wlg9
 
-      # WithOwnChildrenUnder18.
+  # WithOwnChildrenUnder18.
   - name: "CensusACS5YearSurvey_SubjectTables_S1251: Children & Household SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -890,7 +887,7 @@ calculations:
             - Count_Person_WithOwnChildrenUnder18_Female
             - Count_Person_WithOwnChildrenUnder18_Male
 
-      # Marriage
+  # Marriage
   - name: "CensusACS5YearSurvey_SubjectTables_S1201: Marital Status SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -915,7 +912,7 @@ calculations:
             - Count_Person_InLaborForce_Female_Widowed
             - Count_Person_InLaborForce_Male_Widowed
 
-      # Employment by business ownership type.
+  # Employment by business ownership type.
   - name: "CensusACS5YearSurvey_SubjectTables_S2408: Business Ownership Employment SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -932,7 +929,7 @@ calculations:
             - Count_Person_PrivatelyOwnedForProfitEstablishment_Male_PaidWorker
             - Count_Person_PrivatelyOwnedForProfitEstablishment_Female_PaidWorker
 
-      # US Citizen by Naturalization
+  # US Citizen by Naturalization
   - name: "CensusACS5YearSurvey_SubjectTables_S0504: Naturalized Citizenship SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -1063,7 +1060,7 @@ calculations:
             - Count_Person_USCitizenByNaturalization_DateOfEntry2000OrLater_ForeignBorn
             - Count_Person_USCitizenByNaturalization_DateOfEntry2010OrLater_ForeignBorn
 
-      # HousingUnit HomeValue.
+  # HousingUnit HomeValue.
   - name: "CensusACS5YearSurvey: Home Value SV Aggregation"
     type: STAT_VAR_AGGREGATION
     input_imports:
@@ -1072,7 +1069,7 @@ calculations:
     output_import: CensusACS5YearSurvey_HousingUnit_HomeValue_StatVarAgg
     stat_var_aggregation:
       aggregations:
-      # Census ACS 5 year, Count_HousingUnit_HomeValue
+        # Census ACS 5 year, Count_HousingUnit_HomeValue
         - ancestor_sv_id: Count_HousingUnit_HomeValueUpto49999USDollar
           source_sv_ids:
             - Count_HousingUnit_HomeValueUpto10000USDollar
@@ -1123,7 +1120,7 @@ calculations:
     output_import: CensusACS5YearSurvey_Person_Age_StatVarAgg
     stat_var_aggregation:
       aggregations:
-      # Census ACS 5 year, Age group
+        # Census ACS 5 year, Age group
         - ancestor_sv_id: Count_Person_55To64Years
           source_sv_ids:
             - Count_Person_55To59Years
@@ -1139,7 +1136,7 @@ calculations:
     output_import: CensusACS5YearSurvey_Income_StatVarAgg
     stat_var_aggregation:
       aggregations:
-      # The following 9 aggregations are: High income (100,000 USD or More), by race.
+        # The following 9 aggregations are: High income (100,000 USD or More), by race.
         - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar100000Onwards_WithIncome_AmericanIndianOrAlaskaNativeAlone
           source_sv_ids:
             # Population: Female, 100,000 USD or More, American Indian or Alaska Native Alone, Not Worked Full Time
@@ -1231,7 +1228,7 @@ calculations:
             # Population: Male, 100,000 USD or More, White Alone Not Hispanic or Latino, Worked Full Time
             - dc/ekh1g39v9sgj4
 
-      # The following 9 aggregations are: Low income (10,000 - 12,499 USD), by race.
+        # The following 9 aggregations are: Low income (10,000 - 12,499 USD), by race.
         - ancestor_sv_id: Count_Person_16OrMoreYears_USDollar10000To12499_WithIncome_AmericanIndianOrAlaskaNativeAlone
           source_sv_ids:
             # Population: Female, 10,000 - 12,499 USD, American Indian or Alaska Native Alone, Not Worked Full Time
@@ -1642,7 +1639,7 @@ calculations:
     output_import: OECDRegionalDemography_Person_Age_StatVarAgg
     stat_var_aggregation:
       aggregations:
-      # OECD, Age group
+        # OECD, Age group
         - ancestor_sv_id: Count_Person_Upto9Years
           source_sv_ids:
             - Count_Person_Upto4Years
@@ -1728,7 +1725,7 @@ calculations:
     output_import: CensusACS5YearSurvey_SubjectTables_S0801_StatVarAgg
     stat_var_aggregation:
       aggregations:
-      # Census ACS 5 year, Work Commute group
+        # Census ACS 5 year, Work Commute group
         - ancestor_sv_id: Count_Person_Years16Onwards_WorkCommute_Employed
           source_sv_ids:
             - Count_Person_Years16Onwards_CarTruckOrVan_WorkCommute_Employed
@@ -1774,7 +1771,7 @@ calculations:
             - Count_Person_Years16Onwards_WorkedAtHome_WorkCommute_Employed_Female
             - Count_Person_Years16Onwards_WorkedFromHome_WorkCommute_Employed_Female
           skip_all_sources_present_check: true
-      # Worked outside of Home
+        # Worked outside of Home
         - ancestor_sv_id: Count_Person_Years16Onwards_WorkCommute_Employed_WorkedOutsideOfHome
           source_sv_ids:
             - Count_Person_Years16Onwards_StartTimeHour0000To0459_WorkCommute_Employed_WorkedOutsideOfHome
diff --git a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
index 7602fb587..4969e1a98 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
+++ b/pipeline/workflow/ingestion-helper/aggregation/configs/statvar_calculation.yaml
@@ -1,6 +1,6 @@
 calculations:
 
-# Energy.
+  # Energy.
   - name: "EIA_Electricity: Annual Emissions Per Capita StatVar Calculation"
     type: STAT_VAR_CALCULATION
     input_imports:
@@ -30,16 +30,16 @@ calculations:
               unit: MetricTonCO2ePerGigawattHour
               observation_period: P1Y
 
-# Climate: Temperature modeling diffs from actual.
-# There are 30+ models. For each model, the output diffs are done for
-# Aggregations: Mean, Min, Max and also for SSP2 with RCP4.5 and SSP5 with
-# RCP8.5 (for each model).
-# This includes diffs for P1M, P1Y, P5Y and P10Y.
-# Note that the computations below use sv_regex and measurement_method_regex
-# instead of sv and measurement_method to allow more expressive matching. Also
-# note that the output uses sv_prefix and measurement_method_prefix which is not
-# the case above, for example. Whenever sv and measurement_method are set in the
-# output, they are preferred. Otherwise, the *_prefix is used.
+  # Climate: Temperature modeling diffs from actual.
+  # There are 30+ models. For each model, the output diffs are done for
+  # Aggregations: Mean, Min, Max and also for SSP2 with RCP4.5 and SSP5 with
+  # RCP8.5 (for each model).
+  # This includes diffs for P1M, P1Y, P5Y and P10Y.
+  # Note that the computations below use sv_regex and measurement_method_regex
+  # instead of sv and measurement_method to allow more expressive matching. Also
+  # note that the output uses sv_prefix and measurement_method_prefix which is not
+  # the case above, for example. Whenever sv and measurement_method are set in the
+  # output, they are preferred. Otherwise, the *_prefix is used.
   - name: "Climate: NASA CMIP6 Temperature Modeling Diffs Calculation"
     type: STAT_VAR_CALCULATION
     input_imports:

From 645966dab0e6e3b8e8f5c4b1271eb491f0b12e12 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Thu, 2 Jul 2026 21:25:03 +0530
Subject: [PATCH 30/33] fix(aggregation): resolve review comments for config
 directory scanning and schema validation

---
 .../ingestion-helper/aggregation/orchestrator.py      | 11 ++++++++++-
 .../ingestion-helper/aggregation/orchestrator_test.py | 11 +++++++++++
 .../workflow/ingestion-helper/aggregation/schema.json |  4 +++-
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index 52f9de8a8..a7ddb79a1 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -14,6 +14,7 @@
 
 """Aggregation orchestrator for Data Commons ingestion workflow."""
 
+import glob
 import logging
 import os
 import time
@@ -70,7 +71,15 @@ def __init__(
         schema_file_path = os.path.join(curr_dir, "schema.json")
 
         # Load and validate configuration
-        self.calculations = validate_config(target_config, schema_file_path)
+        self.calculations = []
+        if os.path.isdir(target_config):
+            yaml_files = sorted(
+                glob.glob(os.path.join(target_config, "*.yaml")) + glob.glob(os.path.join(target_config, "*.yml"))
+            )
+            for file_path in yaml_files:
+                self.calculations.extend(validate_config(file_path, schema_file_path))
+        else:
+            self.calculations = validate_config(target_config, schema_file_path)
 
     def run(self, active_imports: List[str]) -> None:
         """Executes aggregations independently for each active import.
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index c724248d2..3646b8cc2 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -80,6 +80,17 @@ def test_get_active_stages_for_import(self, mock_executor):
         stages = self.orchestrator.get_active_stages_for_import("OtherImport")
         self.assertEqual(stages, [])
 
+    def test_directory_config_loading(self, mock_executor):
+        """Tests that orchestrator correctly scans and loads config files from a directory."""
+        dir_orchestrator = AggregationOrchestrator(
+            connection_id="conn",
+            project_id="proj",
+            instance_id="inst",
+            database_id="db",
+            config_dir=self.tmpdir.name
+        )
+        self.assertEqual(len(dir_orchestrator.calculations), 2)
+
 
 @patch('aggregation.orchestrator.BigQueryExecutor')
 @patch('aggregation.orchestrator.PlaceAggregationGenerator')
diff --git a/pipeline/workflow/ingestion-helper/aggregation/schema.json b/pipeline/workflow/ingestion-helper/aggregation/schema.json
index ab59d0f15..a19047eb6 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/schema.json
+++ b/pipeline/workflow/ingestion-helper/aggregation/schema.json
@@ -10,6 +10,7 @@
       "items": {
         "type": "object",
         "required": ["type", "input_imports"],
+        "additionalProperties": false,
         "properties": {
           "name": { "type": "string" },
           "type": {
@@ -37,7 +38,8 @@
           "stat_var_aggregation": { "type": "object" },
           "entity_aggregation": { "type": "object" },
           "stat_var_series_aggregation": { "type": "object" },
-          "stat_var_calculation": { "type": "object" }
+          "stat_var_calculation": { "type": "object" },
+          "disabled": { "type": "boolean" }
         },
         "allOf": [
           {

From 507ade4e8c15256c7eceed028704445b87663969 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Fri, 3 Jul 2026 10:57:54 +0530
Subject: [PATCH 31/33] fix(aggregation): connect StatVarCalculationGenerator
 and clean up legacy fallback getters

---
 .../aggregation/orchestrator.py               | 31 ++++++++++++-------
 .../aggregation/orchestrator_test.py          |  3 +-
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index a7ddb79a1..9ac6c5a31 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -25,6 +25,7 @@
 from .place_aggregation_generator import PlaceAggregationGenerator
 from .provenance_summary_generator import ProvenanceSummaryGenerator
 from .stat_var_aggregator import StatVarAggregator
+from .stat_var_calculation_generator import StatVarCalculationGenerator
 from .stat_var_group_generator import StatVarGroupGenerator
 from .validator import validate_config
 
@@ -176,6 +177,8 @@ def _dispatch_stage_steps(self, single_import: str, stage_num: int) -> List[Any]
                 step_jobs = self._trigger_place(calc, [single_import])
             elif step_type == "STAT_VAR_AGGREGATION":
                 step_jobs = self._trigger_stat_var(calc, [single_import])
+            elif step_type == "STAT_VAR_CALCULATION":
+                step_jobs = self._trigger_stat_var_calculation(calc, [single_import])
             elif step_type == "LINKED_EDGES":
                 step_jobs = self._trigger_linked_edges(calc, [single_import])
             elif step_type == "PROVENANCE_SUMMARY":
@@ -221,8 +224,8 @@ def _wait_for_jobs(self, job_ids: List[str], poll_interval: int = 5) -> None:
     def _trigger_place(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
         """Triggers place-level rollup aggregations."""
         place_cfg = config.get("place_aggregation", {})
-        from_type = place_cfg.get("from_place_types") or config.get("source_type")
-        to_type = place_cfg.get("to_place_types") or config.get("destination_type")
+        from_type = place_cfg["from_place_types"]
+        to_type = place_cfg["to_place_types"]
 
         logging.info(f"  -> Place Rollup: {from_type} -> {to_type} for imports {applicable_imports}")
         generator = PlaceAggregationGenerator(self.executor, self.is_base_dc)
@@ -238,15 +241,7 @@ def _trigger_stat_var(self, config: Dict[str, Any], applicable_imports: List[str
         """Triggers statistical variable aggregations."""
         stat_cfg = config.get("stat_var_aggregation", {})
         aggregations = stat_cfg.get("aggregations", [])
-        output_import_name = config.get("output_import") or config.get("output_import_name")
-
-        # Backwards compatibility fallback for single item config
-        if not aggregations and "ancestor_sv_id" in config:
-            aggregations = [{
-                "ancestor_sv_id": config["ancestor_sv_id"],
-                "source_sv_ids": config["source_sv_ids"],
-                "skip_all_sources_present_check": config.get("skip_all_sources_present_check", False)
-            }]
+        output_import_name = config.get("output_import")
 
         generator = StatVarAggregator(self.executor, self.is_base_dc)
         jobs = []
@@ -268,6 +263,20 @@ def _trigger_stat_var(self, config: Dict[str, Any], applicable_imports: List[str
 
         return jobs
 
+    def _trigger_stat_var_calculation(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
+        """Triggers statistical variable calculations."""
+        calc_cfg = config.get("stat_var_calculation", {})
+        calculations = calc_cfg.get("calculations", [])
+        output_import_name = config.get("output_import")
+
+        logging.info(f"  -> Stat Var Calculation for imports {applicable_imports}")
+        generator = StatVarCalculationGenerator(self.executor, self.is_base_dc)
+        return generator.calculate_stat_vars(
+            calculations=calculations,
+            import_names=applicable_imports,
+            output_import_name=output_import_name
+        )
+
     def _trigger_linked_edges(self, config: Dict[str, Any], applicable_imports: List[str]) -> List[Any]:
         """Triggers linked edge aggregations."""
         logging.info(f"  -> Linked Edges Aggregation for imports {applicable_imports}")
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index 3646b8cc2..fa431e74f 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -95,6 +95,7 @@ def test_directory_config_loading(self, mock_executor):
 @patch('aggregation.orchestrator.BigQueryExecutor')
 @patch('aggregation.orchestrator.PlaceAggregationGenerator')
 @patch('aggregation.orchestrator.StatVarAggregator')
+@patch('aggregation.orchestrator.StatVarCalculationGenerator')
 class TestOrchestratorExecution(unittest.TestCase):
     """Tests stage execution, verifying job submission and synchronization."""
 
@@ -115,7 +116,7 @@ def setUp(self):
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def test_run_synchronized_pipeline(self, mock_sv_agg, mock_place_gen, mock_executor_cls):
+    def test_run_synchronized_pipeline(self, mock_calc_gen, mock_sv_agg, mock_place_gen, mock_executor_cls):
         """Tests complete synchronized run pipeline for an import across stages."""
         mock_job1 = MagicMock()
         mock_job1.job_id = "job-place-1"

From 3b1baf7f3ee1d4d52285a1036a7e71554f1e6036 Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Fri, 3 Jul 2026 11:04:15 +0530
Subject: [PATCH 32/33] refactor(aggregation): make
 get_active_stages_for_import private and test via public API

---
 .../workflow/ingestion-helper/aggregation/orchestrator.py   | 6 +++---
 .../ingestion-helper/aggregation/orchestrator_test.py       | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index 9ac6c5a31..10f36979a 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -95,7 +95,7 @@ def run(self, active_imports: List[str]) -> None:
 
         for single_import in active_imports:
             logging.info(f"=== Starting Aggregation Pipeline for Import: '{single_import}' ===")
-            active_stages = self.get_active_stages_for_import(single_import)
+            active_stages = self._get_active_stages_for_import(single_import)
 
             if not active_stages:
                 logging.info(f"No aggregation steps configured for import '{single_import}'. Skipping.")
@@ -107,7 +107,7 @@ def run(self, active_imports: List[str]) -> None:
 
             logging.info(f"=== Successfully completed all aggregation stages for Import: '{single_import}' ===")
 
-    def get_active_stages_for_import(self, single_import: str) -> List[int]:
+    def _get_active_stages_for_import(self, single_import: str) -> List[int]:
         """Returns a sorted list of unique active stage numbers for a single import.
 
         Args:
@@ -126,7 +126,7 @@ def get_active_stages(self, active_imports: List[str]) -> List[int]:
         """Returns a sorted list of unique active stage numbers across active imports."""
         stages = set()
         for single_import in active_imports:
-            stages.update(self.get_active_stages_for_import(single_import))
+            stages.update(self._get_active_stages_for_import(single_import))
         return sorted(list(stages))
 
     def execute_stage(self, stage_num: int, active_imports: List[str]) -> List[str]:
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index fa431e74f..6acded88f 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -72,12 +72,12 @@ def setUp(self):
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def test_get_active_stages_for_import(self, mock_executor):
+    def test_get_active_stages(self, mock_executor):
         """Tests getting active stages for matching and non-matching imports."""
-        stages = self.orchestrator.get_active_stages_for_import("USFed_Census")
+        stages = self.orchestrator.get_active_stages(["USFed_Census"])
         self.assertEqual(stages, [1, 2])
 
-        stages = self.orchestrator.get_active_stages_for_import("OtherImport")
+        stages = self.orchestrator.get_active_stages(["OtherImport"])
         self.assertEqual(stages, [])
 
     def test_directory_config_loading(self, mock_executor):

From de358cfaabfccaad0ecf6a94e21b13a825eea72d Mon Sep 17 00:00:00 2001
From: Sandeep Tuniki <sandeep.tnk29@gmail.com>
Date: Fri, 3 Jul 2026 13:16:31 +0530
Subject: [PATCH 33/33] feat(aggregation): integrate AggregationOrchestrator
 into Cloud Run Job with dry_run support

---
 .../workflow/aggregation-helper/Dockerfile    | 22 ++++++-
 .../aggregation-helper/cloudbuild.yaml        |  1 +
 pipeline/workflow/aggregation-helper/main.py  | 60 +++++++++++++++----
 pipeline/workflow/build-services.yaml         |  2 +-
 .../aggregation/bq_executor.py                | 10 +++-
 .../aggregation/orchestrator.py               | 13 +++-
 .../aggregation/orchestrator_test.py          | 28 ++++++++-
 7 files changed, 114 insertions(+), 22 deletions(-)

diff --git a/pipeline/workflow/aggregation-helper/Dockerfile b/pipeline/workflow/aggregation-helper/Dockerfile
index b782018c8..8934dd82a 100644
--- a/pipeline/workflow/aggregation-helper/Dockerfile
+++ b/pipeline/workflow/aggregation-helper/Dockerfile
@@ -14,13 +14,29 @@
 
 FROM python:3.12-slim
 
+# Copy uv binary
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+
 # Allow statements and log messages to immediately appear in the logs
 ENV PYTHONUNBUFFERED True
 
 WORKDIR /app
 
-# Copy local code to the container image
-COPY main.py .
+# Copy dependency definition files to leverage Docker layer caching
+COPY ingestion-helper/pyproject.toml ingestion-helper/uv.lock ingestion-helper/__init__.py ./
+
+# Install production dependencies (without the project itself)
+RUN uv sync --no-dev --no-install-project
+
+# Copy ingestion-helper code (includes aggregation package) and main.py
+COPY ingestion-helper/ .
+COPY aggregation-helper/main.py .
+
+# Install the project itself
+RUN uv sync --no-dev
+
+# Place the virtual environment's bin directory on the PATH
+ENV PATH="/app/.venv/bin:$PATH"
 
-# Run the script
+# Run the Cloud Run Job script
 ENTRYPOINT ["python", "main.py"]
diff --git a/pipeline/workflow/aggregation-helper/cloudbuild.yaml b/pipeline/workflow/aggregation-helper/cloudbuild.yaml
index b3fc0d318..4c3f78cf1 100644
--- a/pipeline/workflow/aggregation-helper/cloudbuild.yaml
+++ b/pipeline/workflow/aggregation-helper/cloudbuild.yaml
@@ -17,6 +17,7 @@ steps:
   - name: 'gcr.io/cloud-builders/docker'
     args: [
       'build',
+      '-f', 'aggregation-helper/Dockerfile',
       '-t', '${_AR_REPO_URL}/${_IMAGE_NAME}:${_TAG}',
       '-t', '${_AR_REPO_URL}/${_IMAGE_NAME}:${_VERSION}',
       '.'
diff --git a/pipeline/workflow/aggregation-helper/main.py b/pipeline/workflow/aggregation-helper/main.py
index e74a7f996..9438774da 100644
--- a/pipeline/workflow/aggregation-helper/main.py
+++ b/pipeline/workflow/aggregation-helper/main.py
@@ -12,20 +12,39 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Aggregation Helper Cloud Run Job skeleton."""
+"""Aggregation Helper Cloud Run Job execution entry point."""
 
 import argparse
 import json
 import logging
+import os
 import sys
 
+from aggregation import AggregationOrchestrator
+
+
 def main():
     logging.basicConfig(level=logging.INFO)
-    logging.info("Starting Aggregation Helper Job")
+    logging.info("Starting Aggregation Helper Cloud Run Job...")
 
     parser = argparse.ArgumentParser(description="Run aggregation helper job.")
-    parser.add_argument("--import_list", help="JSON string representing the list of imports to process.")
-    
+    parser.add_argument(
+        "--import_list",
+        help="JSON string representing the list of imports to process."
+    )
+    parser.add_argument(
+        "--dry_run",
+        action="store_true",
+        default=True,
+        help="Run in dry-run mode without executing jobs (default: True)."
+    )
+    parser.add_argument(
+        "--execute",
+        action="store_false",
+        dest="dry_run",
+        help="Disable dry-run mode and execute BigQuery aggregation jobs."
+    )
+
     args = parser.parse_args()
 
     if not args.import_list:
@@ -34,17 +53,36 @@ def main():
 
     try:
         import_list = json.loads(args.import_list)
-        logging.info(f"Received import list: {import_list}")
+        logging.info(f"Received active imports to process: {import_list}")
     except json.JSONDecodeError as e:
         logging.error(f"Failed to parse import_list JSON: {e}")
         sys.exit(1)
 
-    # Dummy logic
-    logging.info("Processing aggregation (dummy)...")
-    for imp in import_list:
-        logging.info(f"Processing import: {imp}")
-    
-    logging.info("Aggregation Helper Job completed successfully.")
+    connection_id = os.environ.get("BQ_SPANNER_CONN_ID")
+    project_id = os.environ.get("PROJECT_ID")
+    instance_id = os.environ.get("SPANNER_INSTANCE_ID")
+    database_id = os.environ.get("SPANNER_DATABASE_ID") or os.environ.get("SPANNER_GRAPH_DATABASE_ID")
+    location = os.environ.get("LOCATION")
+
+    if not all([connection_id, project_id, instance_id, database_id]):
+        logging.error(
+            f"Missing required environment variables. connection_id={connection_id}, "
+            f"project_id={project_id}, instance_id={instance_id}, database_id={database_id}"
+        )
+        sys.exit(1)
+
+    orchestrator = AggregationOrchestrator(
+        connection_id=connection_id,
+        project_id=project_id,
+        instance_id=instance_id,
+        database_id=database_id,
+        location=location,
+    )
+
+    logging.info(f"Executing AggregationOrchestrator pipeline (dry_run={args.dry_run}) for imports: {import_list}")
+    orchestrator.run(active_imports=import_list, dry_run=args.dry_run)
+    logging.info("Aggregation Helper Cloud Run Job completed successfully.")
+
 
 if __name__ == "__main__":
     main()
diff --git a/pipeline/workflow/build-services.yaml b/pipeline/workflow/build-services.yaml
index dde23e970..f6bf5f530 100644
--- a/pipeline/workflow/build-services.yaml
+++ b/pipeline/workflow/build-services.yaml
@@ -36,7 +36,7 @@ steps:
 
 - id: 'build-aggregation-helper'
   name: 'gcr.io/cloud-builders/gcloud'
-  args: ['builds', 'submit', 'aggregation-helper', '--config', 'aggregation-helper/cloudbuild.yaml', '--substitutions', '_AR_REPO_URL=${_AR_REPO_URL},_VERSION=${_VERSION}']
+  args: ['builds', 'submit', '.', '--config', 'aggregation-helper/cloudbuild.yaml', '--substitutions', '_AR_REPO_URL=${_AR_REPO_URL},_VERSION=${_VERSION}']
   dir: 'pipeline/workflow'
   waitFor: ['-']
 
diff --git a/pipeline/workflow/ingestion-helper/aggregation/bq_executor.py b/pipeline/workflow/ingestion-helper/aggregation/bq_executor.py
index 58f4f992e..010e8606b 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/bq_executor.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/bq_executor.py
@@ -37,8 +37,14 @@ def __init__(self,
         self.location = location
         # TODO: Remove run_sequential logic once DCP migrates to async execution.
         self.run_sequential = run_sequential
-        self.client = bigquery.Client(project=self.project_id,
-                                      location=self.location)
+        self._client: Optional[bigquery.Client] = None
+
+    @property
+    def client(self) -> bigquery.Client:
+        """Lazily initializes and returns the BigQuery client."""
+        if self._client is None:
+            self._client = bigquery.Client(project=self.project_id, location=self.location)
+        return self._client
 
     def get_spanner_destination_uri(self) -> str:
         """Returns the Spanner destination URI for EXPORT DATA."""
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
index 10f36979a..bf42d2d53 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator.py
@@ -82,7 +82,7 @@ def __init__(
         else:
             self.calculations = validate_config(target_config, schema_file_path)
 
-    def run(self, active_imports: List[str]) -> None:
+    def run(self, active_imports: List[str], dry_run: bool = True) -> None:
         """Executes aggregations independently for each active import.
 
         Blocks and synchronizes stage progression for each import:
@@ -90,8 +90,11 @@ def run(self, active_imports: List[str]) -> None:
 
         Args:
             active_imports: List of active import dataset names to process.
+            dry_run: If True, logs imports and active stages without executing BigQuery jobs.
         """
-        logging.info(f"Starting Aggregation Orchestrator run for active imports: {active_imports}")
+        logging.info(
+            f"Starting Aggregation Orchestrator run (dry_run={dry_run}) for active imports: {active_imports}"
+        )
 
         for single_import in active_imports:
             logging.info(f"=== Starting Aggregation Pipeline for Import: '{single_import}' ===")
@@ -101,6 +104,12 @@ def run(self, active_imports: List[str]) -> None:
                 logging.info(f"No aggregation steps configured for import '{single_import}'. Skipping.")
                 continue
 
+            if dry_run:
+                logging.info(
+                    f"Detected active stage(s) {active_stages} for import '{single_import}'. Skipping execution because dry_run=True."
+                )
+                continue
+
             for stage_num in active_stages:
                 logging.info(f"--- Triggering Stage {stage_num} for import '{single_import}' ---")
                 self._execute_and_synchronize_stage(single_import, stage_num)
diff --git a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
index 6acded88f..95ed3e9e6 100644
--- a/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
+++ b/pipeline/workflow/ingestion-helper/aggregation/orchestrator_test.py
@@ -116,8 +116,14 @@ def setUp(self):
     def tearDown(self):
         self.tmpdir.cleanup()
 
-    def test_run_synchronized_pipeline(self, mock_calc_gen, mock_sv_agg, mock_place_gen, mock_executor_cls):
-        """Tests complete synchronized run pipeline for an import across stages."""
+    def test_run_dry_run_true(self, mock_calc_gen, mock_sv_agg, mock_place_gen, mock_executor_cls):
+        """Tests that run with dry_run=True logs imports and skips job submission."""
+        self.orchestrator.run(active_imports=["USFed_Census"], dry_run=True)
+        mock_place_gen.return_value.aggregate_places.assert_not_called()
+        mock_sv_agg.return_value.aggregate_stat_vars.assert_not_called()
+
+    def test_run_dry_run_false(self, mock_calc_gen, mock_sv_agg, mock_place_gen, mock_executor_cls):
+        """Tests that run with dry_run=False submits BigQuery jobs across stages."""
         mock_job1 = MagicMock()
         mock_job1.job_id = "job-place-1"
         mock_place_gen.return_value.aggregate_places.return_value = mock_job1
@@ -129,7 +135,7 @@ def test_run_synchronized_pipeline(self, mock_calc_gen, mock_sv_agg, mock_place_
         self.orchestrator.executor = MagicMock()
         self.orchestrator.executor.get_jobs_status.return_value = {"status": "DONE"}
 
-        self.orchestrator.run(active_imports=["USFed_Census"])
+        self.orchestrator.run(active_imports=["USFed_Census"], dry_run=False)
 
         mock_place_gen.return_value.aggregate_places.assert_called_once_with(
             import_names=["USFed_Census"],
@@ -146,6 +152,22 @@ def test_run_synchronized_pipeline(self, mock_calc_gen, mock_sv_agg, mock_place_
             skip_all_sources_present_check=True
         )
 
+    def test_execute_stage(self, mock_calc_gen, mock_sv_agg, mock_place_gen, mock_executor_cls):
+        """Tests manual execution of a specific stage."""
+        mock_job1 = MagicMock()
+        mock_job1.job_id = "job-place-1"
+        mock_place_gen.return_value.aggregate_places.return_value = mock_job1
+
+        job_ids = self.orchestrator.execute_stage(1, ["USFed_Census"])
+
+        mock_place_gen.return_value.aggregate_places.assert_called_once_with(
+            import_names=["USFed_Census"],
+            source_type="County",
+            destination_type="State",
+            allow_multiple_to_places=False
+        )
+        self.assertEqual(job_ids, ["job-place-1"])
+
 
 if __name__ == '__main__':
     unittest.main()