From 0b8e7f6660b83439d53bd8dbcde68cf33bb140dc Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 19 May 2026 17:02:22 -0500 Subject: [PATCH 1/2] fix: Load any stores.. secret file; drop Py<3.10 dead code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes: 1. settings.py — Config._load_secrets() Was hardcoded to only auto-load stores..access_key and stores..secret_key from the secrets directory. That whitelist made sense when only the built-in s3 / gcs / azure stores existed, but it's wrong for plugin-registered adapters that define their own secret fields. Concrete example: a plugin that wraps Databricks Unity Catalog Volumes authenticates via a Bearer token. Its store spec wants a `token` field. Today the file .secrets/stores.uc.token is silently ignored, forcing users to either rename to a misleading "access_key" or to set the value programmatically at notebook startup (defeating the auto-load story). This change drops the whitelist. Any stores.. file under the secrets directory is loaded into dj.config["stores"][][]. The existing "don't override pre-configured values" precedence is preserved — config file and env vars still win. New test: TestStoreSecrets.test_load_store_arbitrary_attr confirms that `token` and `api_key` fields load via the same path as access_key/secret_key. 2. storage_adapter.py — _discover_adapters() Removed the try/except wrapping importlib.metadata.entry_points(). The fallback path was for Python < 3.10 (where entry_points() returned a SelectableGroups dict instead of accepting the group= kwarg), but DataJoint's minimum supported Python is 3.10 (per requires-python in pyproject.toml). The dead branch was also producing a mypy type error (SelectableGroups.get signature). --- src/datajoint/settings.py | 25 +++++++++++++------------ src/datajoint/storage_adapter.py | 13 +++---------- tests/unit/test_settings.py | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/datajoint/settings.py b/src/datajoint/settings.py index 1be1ecba2..bad488e2b 100644 --- a/src/datajoint/settings.py +++ b/src/datajoint/settings.py @@ -691,26 +691,27 @@ def _load_secrets(self, secrets_dir: Path) -> None: self.database.password = db_password logger.debug(f"Loaded database.password from {secrets_dir}") - # Load per-store secrets (stores..access_key, stores..secret_key) - # Iterate through all files in secrets directory + # Load per-store secrets from any stores.. file. + # The attr name is recorded as-is on stores.; this lets + # plugin-registered adapters define their own secret fields + # (e.g. a Bearer ``token`` for HTTP-based protocols) without + # forcing AWS-style ``access_key`` / ``secret_key`` naming. if secrets_dir.is_dir(): for secret_file in secrets_dir.iterdir(): if not secret_file.is_file() or secret_file.name.startswith("."): continue parts = secret_file.name.split(".") - # Check for stores..access_key or stores..secret_key pattern if len(parts) == 3 and parts[0] == "stores": store_name, attr = parts[1], parts[2] - if attr in ("access_key", "secret_key"): - value = secret_file.read_text().strip() - # Initialize store dict if needed - if store_name not in self.stores: - self.stores[store_name] = {} - # Only set if not already present - if attr not in self.stores[store_name]: - self.stores[store_name][attr] = value - logger.debug(f"Loaded stores.{store_name}.{attr} from {secrets_dir}") + value = secret_file.read_text().strip() + # Initialize store dict if needed + if store_name not in self.stores: + self.stores[store_name] = {} + # Only set if not already present (config / env vars win) + if attr not in self.stores[store_name]: + self.stores[store_name][attr] = value + logger.debug(f"Loaded stores.{store_name}.{attr} from {secrets_dir}") @contextmanager def override(self, **kwargs: Any) -> Iterator["Config"]: diff --git a/src/datajoint/storage_adapter.py b/src/datajoint/storage_adapter.py index b304586b2..0cb93031b 100644 --- a/src/datajoint/storage_adapter.py +++ b/src/datajoint/storage_adapter.py @@ -86,16 +86,9 @@ def get_storage_adapter(protocol: str) -> StorageAdapter | None: def _discover_adapters() -> None: """Load storage adapters from datajoint.storage entry points.""" - try: - from importlib.metadata import entry_points - except ImportError: - logger.debug("importlib.metadata not available, skipping adapter discovery") - return - - try: - eps = entry_points(group="datajoint.storage") - except TypeError: - eps = entry_points().get("datajoint.storage", []) + from importlib.metadata import entry_points + + eps = entry_points(group="datajoint.storage") for ep in eps: if ep.name in _adapter_registry: diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py index 83bb1d67c..22b27dc63 100644 --- a/tests/unit/test_settings.py +++ b/tests/unit/test_settings.py @@ -487,6 +487,24 @@ def test_secrets_do_not_override_existing(self, tmp_path): finally: cfg.stores = original_stores + def test_load_store_arbitrary_attr(self, tmp_path): + """Plugin-registered adapters can use arbitrary secret-field names.""" + # e.g. an HTTP-based protocol that authenticates with a Bearer token + secrets_dir = tmp_path / SECRETS_DIRNAME + secrets_dir.mkdir() + (secrets_dir / "stores.bearer_store.token").write_text("dapibdfXXXX") + (secrets_dir / "stores.bearer_store.api_key").write_text("ak_yyy") + + cfg = settings.Config() + original_stores = cfg.stores.copy() + try: + cfg._load_secrets(secrets_dir) + + assert cfg.stores["bearer_store"]["token"] == "dapibdfXXXX" + assert cfg.stores["bearer_store"]["api_key"] == "ak_yyy" + finally: + cfg.stores = original_stores + class TestDisplaySettings: """Test display-related settings.""" From 0b1aca6f50c643f72bca67b7149061787e19c49a Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 20 May 2026 11:18:20 -0500 Subject: [PATCH 2/2] =?UTF-8?q?feat(config):=20DJ=5FSTORES=20env=20var=20+?= =?UTF-8?q?=20DJ=5FIGNORE=5FCONFIG=5FFILE=20flag=20=E2=80=94=20new=20in=20?= =?UTF-8?q?2.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds env-var configuration for object stores so the DataJoint platform — and any env-var-only deployment — can configure plugin-registered storage adapters (Databricks Unity Catalog Volumes, custom HTTP stores, lab archive systems) without files on disk. - DJ_STORES (JSON-encoded) carries the entire `stores` dict in the same shape used in `datajoint.json`. Replaces the file's `stores` block when set. - DJ_IGNORE_CONFIG_FILE (default false) skips `datajoint.json`, the project `.secrets/`, and `/run/secrets/datajoint/` entirely. Hard guarantee that no file on disk leaks into config. Implementation notes: - New `Config.ignore_config_file` field (validation_alias DJ_IGNORE_CONFIG_FILE) auto-bound by pydantic-settings. - The `stores` field receives a `validation_alias` placeholder so pydantic-settings does NOT auto-bind DJ_STORES at Config() construction. Otherwise its built-in JSON parser intercepts before precedence logic runs and reports SettingsError instead of a clean ValueError. - New `Config._apply_stores_env()` parses DJ_STORES JSON, replaces self.stores wholesale, raises ValueError on bad JSON or non-object payloads. - `_create_config()` restructured to: skip file + secrets when ignore_config_file is set; apply DJ_STORES between file load and secrets fill so env wins over file and secrets only fill gaps. Functional precedence (high to low): programmatic > DJ_STORES > config file > `.secrets/stores..` (fills missing attrs only). Tests: new TestStoreEnv class with 8 tests; existing TestStoreSecrets and test_storage_adapter tests still pass. Companion docs: datajoint/datajoint-docs#172 --- src/datajoint/settings.py | 84 +++++++++++++++++++++++------- tests/unit/test_settings.py | 101 ++++++++++++++++++++++++++++++++++++ 2 files changed, 166 insertions(+), 19 deletions(-) diff --git a/src/datajoint/settings.py b/src/datajoint/settings.py index bad488e2b..73b9a820a 100644 --- a/src/datajoint/settings.py +++ b/src/datajoint/settings.py @@ -335,17 +335,32 @@ class Config(BaseSettings): jobs: JobsSettings = Field(default_factory=JobsSettings) # Unified stores configuration (replaces external and object_storage) + # ``validation_alias`` redirects pydantic-settings' env source away from the + # natural ``DJ_STORES`` so it doesn't auto-parse on Config() construction. + # ``DJ_STORES`` is handled by ``_apply_stores_env`` after the config file + # load so env-var precedence is honored. *New in 2.3.* stores: dict[str, Any] = Field( default_factory=dict, + validation_alias="_DJ_STORES_PYDANTIC_DISABLED", description="Unified object storage configuration. " "Use stores.default to designate default store. " - "Configure named stores as stores..protocol, stores..location, etc.", + "Configure named stores as stores..protocol, stores..location, etc. " + "Set via DJ_STORES (JSON object) or in datajoint.json. *New in 2.3* for " + "DJ_STORES env-var support.", ) # Top-level settings loglevel: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default="INFO", validation_alias="DJ_LOG_LEVEL") safemode: bool = True + ignore_config_file: bool = Field( + default=False, + validation_alias="DJ_IGNORE_CONFIG_FILE", + description="If True, skip loading datajoint.json and the secrets directory. " + "Intended for env-var-only deployments (e.g. the DataJoint platform). " + "*New in 2.3.*", + ) + # Cache path for query results query_cache: Path | None = None @@ -713,6 +728,29 @@ def _load_secrets(self, secrets_dir: Path) -> None: self.stores[store_name][attr] = value logger.debug(f"Loaded stores.{store_name}.{attr} from {secrets_dir}") + def _apply_stores_env(self) -> None: + """Replace ``self.stores`` from the ``DJ_STORES`` env var if set. + + ``DJ_STORES`` holds a JSON object in the same shape as the ``stores`` + block of ``datajoint.json``. This lets env-var-only deployments + configure plugin-registered storage adapters with arbitrary attr + names (e.g. a Bearer ``token`` field) without negotiating an env-var + naming scheme per attr. + + *New in 2.3.* + """ + raw = os.environ.get("DJ_STORES") + if not raw: + return + try: + data = json.loads(raw) + except json.JSONDecodeError as e: + raise ValueError(f"DJ_STORES contains invalid JSON: {e}") from e + if not isinstance(data, dict): + raise ValueError(f"DJ_STORES must be a JSON object, got {type(data).__name__}") + self.stores = data + logger.debug("Loaded stores from DJ_STORES env var") + @contextmanager def override(self, **kwargs: Any) -> Iterator["Config"]: """ @@ -786,9 +824,13 @@ def save_template( Credentials should NOT be stored in datajoint.json. Instead, use either: - - Environment variables (``DJ_USER``, ``DJ_PASS``, ``DJ_HOST``, etc.) + - Environment variables (``DJ_USER``, ``DJ_PASS``, ``DJ_HOST``, + ``DJ_STORES`` for JSON-encoded store configs, etc.) - The ``.secrets/`` directory (created alongside datajoint.json) + Set ``DJ_IGNORE_CONFIG_FILE=true`` to skip both ``datajoint.json`` and + the secrets directory entirely (env-var-only configuration). + Parameters ---------- path : str or Path, optional @@ -963,25 +1005,29 @@ def _create_config() -> Config: """Create and initialize the global config instance.""" cfg = Config() - # Find config file (recursive parent search) - config_path = find_config_file() + config_path: Path | None = None + if not cfg.ignore_config_file: + config_path = find_config_file() + if config_path is not None: + try: + cfg.load(config_path) + except Exception as e: + warnings.warn(f"Failed to load config from {config_path}: {e}") + else: + warnings.warn( + f"No {CONFIG_FILENAME} found. Using defaults and environment variables. " + f"Run `dj.config.save_template()` to create a template configuration.", + stacklevel=2, + ) - if config_path is not None: - try: - cfg.load(config_path) - except Exception as e: - warnings.warn(f"Failed to load config from {config_path}: {e}") - else: - warnings.warn( - f"No {CONFIG_FILENAME} found. Using defaults and environment variables. " - f"Run `dj.config.save_template()` to create a template configuration.", - stacklevel=2, - ) + # DJ_STORES (if set) overrides the stores dict from the config file + cfg._apply_stores_env() - # Find and load secrets - secrets_dir = find_secrets_dir(config_path) - if secrets_dir is not None: - cfg._load_secrets(secrets_dir) + # Secrets fill missing attrs in whatever ended up in self.stores + if not cfg.ignore_config_file: + secrets_dir = find_secrets_dir(config_path) + if secrets_dir is not None: + cfg._load_secrets(secrets_dir) # Set initial log level logger.setLevel(cfg.loglevel) diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py index 22b27dc63..0aeed2c67 100644 --- a/tests/unit/test_settings.py +++ b/tests/unit/test_settings.py @@ -506,6 +506,107 @@ def test_load_store_arbitrary_attr(self, tmp_path): cfg.stores = original_stores +class TestStoreEnv: + """Test DJ_STORES env var and DJ_IGNORE_CONFIG_FILE flag.""" + + def _isolate_filesystem(self, monkeypatch, tmp_path): + """chdir into a tmp_path with a .git sentinel so find_config_file stops there.""" + (tmp_path / ".git").mkdir() + monkeypatch.chdir(tmp_path) + # Defend against a /run/secrets/datajoint/ on the host + monkeypatch.setattr(settings, "SYSTEM_SECRETS_DIR", tmp_path / "nonexistent-system-secrets") + + def test_dj_stores_sets_stores_dict(self, monkeypatch, tmp_path): + self._isolate_filesystem(monkeypatch, tmp_path) + monkeypatch.setenv( + "DJ_STORES", + '{"uc":{"protocol":"http","token":"dapibd","workspace_url":"https://x"}}', + ) + + with pytest.warns(UserWarning): # "No datajoint.json found" + cfg = settings._create_config() + + assert cfg.stores["uc"]["protocol"] == "http" + assert cfg.stores["uc"]["token"] == "dapibd" + assert cfg.stores["uc"]["workspace_url"] == "https://x" + + def test_dj_stores_overrides_config_file(self, monkeypatch, tmp_path): + self._isolate_filesystem(monkeypatch, tmp_path) + (tmp_path / CONFIG_FILENAME).write_text('{"stores": {"main": {"protocol": "s3", "location": "from-file"}}}') + monkeypatch.setenv( + "DJ_STORES", + '{"main": {"protocol": "http", "location": "from-env"}}', + ) + + cfg = settings._create_config() + + assert cfg.stores["main"]["protocol"] == "http" + assert cfg.stores["main"]["location"] == "from-env" + + def test_dj_stores_invalid_json_raises(self, monkeypatch, tmp_path): + self._isolate_filesystem(monkeypatch, tmp_path) + monkeypatch.setenv("DJ_STORES", "{not json") + with pytest.raises(ValueError, match="DJ_STORES.*invalid JSON"): + settings._create_config() + + def test_dj_stores_non_object_raises(self, monkeypatch, tmp_path): + self._isolate_filesystem(monkeypatch, tmp_path) + monkeypatch.setenv("DJ_STORES", '["a", "b"]') + with pytest.raises(ValueError, match="DJ_STORES must be a JSON object"): + settings._create_config() + + def test_dj_stores_plus_secrets_dir(self, monkeypatch, tmp_path): + """Secrets dir fills attrs that DJ_STORES omits.""" + self._isolate_filesystem(monkeypatch, tmp_path) + # config file lets find_secrets_dir locate .secrets/ next to it + (tmp_path / CONFIG_FILENAME).write_text("{}") + secrets_dir = tmp_path / SECRETS_DIRNAME + secrets_dir.mkdir() + (secrets_dir / "stores.uc.token").write_text("from-secrets") + monkeypatch.setenv("DJ_STORES", '{"uc": {"protocol": "http"}}') + + cfg = settings._create_config() + + assert cfg.stores["uc"]["protocol"] == "http" + assert cfg.stores["uc"]["token"] == "from-secrets" + + def test_ignore_config_file_skips_json(self, monkeypatch, tmp_path): + self._isolate_filesystem(monkeypatch, tmp_path) + (tmp_path / CONFIG_FILENAME).write_text('{"database": {"host": "should-not-load"}}') + monkeypatch.setenv("DJ_IGNORE_CONFIG_FILE", "true") + + cfg = settings._create_config() + + assert cfg.database.host == "localhost" + + def test_ignore_config_file_skips_secrets(self, monkeypatch, tmp_path): + self._isolate_filesystem(monkeypatch, tmp_path) + # Place secrets where find_secrets_dir would find them if not ignored + monkeypatch.setattr(settings, "SYSTEM_SECRETS_DIR", tmp_path / SECRETS_DIRNAME) + secrets_dir = tmp_path / SECRETS_DIRNAME + secrets_dir.mkdir() + (secrets_dir / "database.password").write_text("should-not-load") + monkeypatch.setenv("DJ_IGNORE_CONFIG_FILE", "true") + + cfg = settings._create_config() + + assert cfg.database.password is None + + def test_ignore_config_file_default_loads_both(self, monkeypatch, tmp_path): + """Default (env unset) preserves today's behavior.""" + self._isolate_filesystem(monkeypatch, tmp_path) + (tmp_path / CONFIG_FILENAME).write_text('{"database": {"host": "from-file"}}') + secrets_dir = tmp_path / SECRETS_DIRNAME + secrets_dir.mkdir() + (secrets_dir / "database.user").write_text("dbuser") + monkeypatch.delenv("DJ_IGNORE_CONFIG_FILE", raising=False) + + cfg = settings._create_config() + + assert cfg.database.host == "from-file" + assert cfg.database.user == "dbuser" + + class TestDisplaySettings: """Test display-related settings."""