From f564f1f5def276badfa8ca8cb01a1b35341af357 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 15:25:52 +0200 Subject: [PATCH 01/16] refactor: rename druid_controller module to controller Frees up the `controller/` directory for upcoming dereference and validate submodules, matching the trino-operator layout. No behavior change. --- .../src/{druid_controller.rs => controller.rs} | 0 rust/operator-binary/src/main.rs | 10 +++++----- rust/operator-binary/src/operations/pdb.rs | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) rename rust/operator-binary/src/{druid_controller.rs => controller.rs} (100%) diff --git a/rust/operator-binary/src/druid_controller.rs b/rust/operator-binary/src/controller.rs similarity index 100% rename from rust/operator-binary/src/druid_controller.rs rename to rust/operator-binary/src/controller.rs diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index cd74f3dc..f2206e1b 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use anyhow::anyhow; use clap::Parser; -use druid_controller::{DRUID_CONTROLLER_NAME, FULL_CONTROLLER_NAME}; +use controller::{DRUID_CONTROLLER_NAME, FULL_CONTROLLER_NAME}; use futures::{FutureExt, StreamExt, TryFutureExt}; use stackable_operator::{ YamlSchema, @@ -41,7 +41,7 @@ mod authentication; mod config; mod crd; mod discovery; -mod druid_controller; +mod controller; mod extensions; mod internal_secret; mod listener; @@ -161,9 +161,9 @@ async fn main() -> anyhow::Result<()> { ) .graceful_shutdown_on(sigterm_watcher.handle()) .run( - druid_controller::reconcile_druid, - druid_controller::error_policy, - Arc::new(druid_controller::Ctx { + controller::reconcile_druid, + controller::error_policy, + Arc::new(controller::Ctx { client: client.clone(), operator_environment, product_config, diff --git a/rust/operator-binary/src/operations/pdb.rs b/rust/operator-binary/src/operations/pdb.rs index 4ce3d123..1ceb9e47 100644 --- a/rust/operator-binary/src/operations/pdb.rs +++ b/rust/operator-binary/src/operations/pdb.rs @@ -6,7 +6,7 @@ use stackable_operator::{ use crate::{ crd::{APP_NAME, DruidRole, OPERATOR_NAME, v1alpha1}, - druid_controller::DRUID_CONTROLLER_NAME, + controller::DRUID_CONTROLLER_NAME, }; #[derive(Snafu, Debug)] From f657e980cdd5d42d2675f896b97bd7ab9d053bc1 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 15:35:24 +0200 Subject: [PATCH 02/16] refactor: extract dereference step from reconcile Move all pre-loop client I/O (ZK ConfigMap, OPA URL, S3 connection, deep storage bucket, AuthenticationClasses) into a new `controller::dereference` submodule that returns `DereferencedObjects`. Matches the trino-operator pattern (trino-operator commit 7004062). No behavior change. --- rust/operator-binary/src/controller.rs | 131 +++------------- .../src/controller/dereference.rs | 148 ++++++++++++++++++ 2 files changed, 171 insertions(+), 108 deletions(-) create mode 100644 rust/operator-binary/src/controller/dereference.rs diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index d7b7db8a..0dea9553 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -13,7 +13,7 @@ use product_config::{ types::PropertyNameKind, writer::{PropertiesWriterError, to_java_properties_string}, }; -use snafu::{OptionExt, ResultExt, Snafu}; +use snafu::{ResultExt, Snafu}; use stackable_operator::{ builder::{ self, @@ -27,7 +27,6 @@ use stackable_operator::{ cli::OperatorEnvironmentOptions, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, commons::{ - opa::OpaApiVersion, product_image_selection::{self, ResolvedProductImage}, rbac::build_rbac_resources, }, @@ -77,7 +76,6 @@ use crate::{ LOG_CONFIG_DIRECTORY, MAX_DRUID_LOG_FILES_SIZE, METRICS_PORT, METRICS_PORT_NAME, OPERATOR_NAME, RUNTIME_PROPS, RW_CONFIG_DIRECTORY, S3_ACCESS_KEY, S3_ENDPOINT_URL, S3_PATH_STYLE_ACCESS, S3_SECRET_KEY, STACKABLE_LOG_DIR, ZOOKEEPER_CONNECTION_STRING, - authentication::AuthenticationClassesResolved, authorization::DruidAuthorization, build_recommended_labels, build_string_list, security::DruidTlsSecurity, v1alpha1, }, discovery::{self, build_discovery_configmaps}, @@ -92,6 +90,8 @@ use crate::{ service::{build_rolegroup_headless_service, build_rolegroup_metrics_service}, }; +mod dereference; + pub const DRUID_CONTROLLER_NAME: &str = "druidcluster"; pub const FULL_CONTROLLER_NAME: &str = concatcp!(DRUID_CONTROLLER_NAME, '.', OPERATOR_NAME); @@ -154,43 +154,14 @@ pub enum Error { source: stackable_operator::builder::meta::Error, }, - #[snafu(display( - "failed to get ZooKeeper discovery config map for cluster: {}", - cm_name - ))] - GetZookeeperConnStringConfigMap { - source: stackable_operator::client::Error, - cm_name: String, - }, - - #[snafu(display( - "failed to get OPA discovery config map and/or connection string for cluster: {}", - cm_name - ))] - GetOpaConnString { - source: stackable_operator::commons::opa::Error, - cm_name: String, - }, - - #[snafu(display("failed to get valid S3 connection"))] - GetS3Connection { source: crate::crd::Error }, + #[snafu(display("failed to dereference cluster objects"))] + Dereference { source: dereference::Error }, #[snafu(display("failed to configure S3 connection"))] ConfigureS3 { source: stackable_operator::crd::s3::v1alpha1::ConnectionError, }, - #[snafu(display("failed to get deep storage bucket"))] - GetDeepStorageBucket { - source: stackable_operator::crd::s3::v1alpha1::BucketError, - }, - - #[snafu(display( - "failed to get ZooKeeper connection string from config map {}", - cm_name - ))] - MissingZookeeperConnString { cm_name: String }, - #[snafu(display("failed to transform configs"))] ProductConfigTransform { source: stackable_operator::product_config_utils::Error, @@ -245,17 +216,9 @@ pub enum Error { name: String, }, - #[snafu(display("object defines no namespace"))] - ObjectHasNoNamespace, - #[snafu(display("failed to initialize security context"))] FailedToInitializeSecurityContext { source: crate::crd::security::Error }, - #[snafu(display("failed to retrieve AuthenticationClass"))] - AuthenticationClassRetrieval { - source: crate::crd::authentication::Error, - }, - #[snafu(display("failed to get JVM config"))] GetJvmConfig { source: crate::config::jvm::Error }, @@ -394,11 +357,11 @@ pub async fn reconcile_druid( .context(InvalidDruidClusterSnafu)?; let client = &ctx.client; - let namespace = &druid - .metadata - .namespace - .clone() - .with_context(|| ObjectHasNoNamespaceSnafu {})?; + + let dereferenced_objects = dereference::dereference(client, druid) + .await + .context(DereferenceSnafu)?; + let resolved_product_image = druid .spec .image @@ -409,63 +372,15 @@ pub async fn reconcile_druid( ) .context(ResolveProductImageSnafu)?; - let zk_confmap = druid.spec.cluster_config.zookeeper_config_map_name.clone(); - let zk_connstr = client - .get::(&zk_confmap, namespace) - .await - .context(GetZookeeperConnStringConfigMapSnafu { - cm_name: zk_confmap.clone(), - })? - .data - .and_then(|mut data| data.remove("ZOOKEEPER")) - .context(MissingZookeeperConnStringSnafu { - cm_name: zk_confmap.clone(), - })?; + let druid_tls_security = DruidTlsSecurity::new_from_druid_cluster( + druid, + &dereferenced_objects.resolved_authentication_classes, + ); - // Assemble the OPA connection string from the discovery and the given path, if a spec is given. - let opa_connstr = if let Some(DruidAuthorization { opa: opa_config }) = - &druid.spec.cluster_config.authorization - { - Some( - opa_config - .full_document_url_from_config_map(client, druid, Some("allow"), &OpaApiVersion::V1) - .await - .context(GetOpaConnStringSnafu { - cm_name: opa_config.config_map_name.clone(), - })?, + let druid_auth_config = + DruidAuthenticationConfig::try_from( + dereferenced_objects.resolved_authentication_classes.clone(), ) - } else { - None - }; - - // Get the s3 connection if one is defined - let s3_conn = druid - .get_s3_connection(client) - .await - .context(GetS3ConnectionSnafu)?; - - let deep_storage_bucket_name = match &druid.spec.cluster_config.deep_storage { - DeepStorageSpec::S3(s3_spec) => Some( - s3_spec - .bucket - .clone() - .resolve(client, namespace) - .await - .context(GetDeepStorageBucketSnafu)? - .bucket_name, - ), - _ => None, - }; - - let resolved_auth_classes = - AuthenticationClassesResolved::from(&druid.spec.cluster_config, client) - .await - .context(AuthenticationClassRetrievalSnafu)?; - - let druid_tls_security = - DruidTlsSecurity::new_from_druid_cluster(druid, &resolved_auth_classes); - - let druid_auth_config = DruidAuthenticationConfig::try_from(resolved_auth_classes) .context(InvalidDruidAuthenticationConfigSnafu)?; let role_config = transform_all_roles_to_config(druid, &druid.build_role_properties()); @@ -569,10 +484,10 @@ pub async fn reconcile_druid( &rolegroup, rolegroup_config, &merged_rolegroup_config, - &zk_connstr, - opa_connstr.as_deref(), - s3_conn.as_ref(), - deep_storage_bucket_name.as_deref(), + &dereferenced_objects.zookeeper_connection_string, + dereferenced_objects.opa_connection_string.as_deref(), + dereferenced_objects.s3_connection.as_ref(), + dereferenced_objects.deep_storage_bucket_name.as_deref(), &druid_tls_security, &druid_auth_config, )?; @@ -583,7 +498,7 @@ pub async fn reconcile_druid( &rolegroup, rolegroup_config, &merged_rolegroup_config, - s3_conn.as_ref(), + dereferenced_objects.s3_connection.as_ref(), &druid_tls_security, &druid_auth_config, &rbac_sa, @@ -1389,7 +1304,7 @@ mod test { use rstest::*; use super::*; - use crate::crd::PROP_SEGMENT_CACHE_LOCATIONS; + use crate::crd::{PROP_SEGMENT_CACHE_LOCATIONS, authentication::AuthenticationClassesResolved}; #[derive(Snafu, Debug, EnumDiscriminants)] #[strum_discriminants(derive(IntoStaticStr))] diff --git a/rust/operator-binary/src/controller/dereference.rs b/rust/operator-binary/src/controller/dereference.rs new file mode 100644 index 00000000..37abde0f --- /dev/null +++ b/rust/operator-binary/src/controller/dereference.rs @@ -0,0 +1,148 @@ +//! The dereference step in the DruidCluster controller +//! +//! Fetches all Kubernetes objects referenced by the DruidCluster spec and returns them in +//! [`DereferencedObjects`]. The helpers called here (`AuthenticationClassesResolved::from`, +//! `DruidCluster::get_s3_connection`, `S3Bucket::resolve`, +//! `OpaConfig::full_document_url_from_config_map`) currently mix fetching and validation; +//! their outputs are treated as "dereferenced" for now. Splitting those helpers is a +//! follow-up. + +use snafu::{OptionExt, ResultExt, Snafu}; +use stackable_operator::{ + client::Client, + commons::opa::OpaApiVersion, + crd::s3, + k8s_openapi::api::core::v1::ConfigMap, +}; + +use crate::crd::{ + DeepStorageSpec, authentication::AuthenticationClassesResolved, + authorization::DruidAuthorization, v1alpha1, +}; + +#[derive(Snafu, Debug)] +#[allow(clippy::enum_variant_names)] +pub enum Error { + #[snafu(display("object defines no namespace"))] + ObjectHasNoNamespace, + + #[snafu(display( + "failed to get ZooKeeper discovery config map for cluster: {}", + cm_name + ))] + GetZookeeperConnStringConfigMap { + source: stackable_operator::client::Error, + cm_name: String, + }, + + #[snafu(display( + "failed to get ZooKeeper connection string from config map {}", + cm_name + ))] + MissingZookeeperConnString { cm_name: String }, + + #[snafu(display( + "failed to get OPA discovery config map and/or connection string for cluster: {}", + cm_name + ))] + GetOpaConnString { + source: stackable_operator::commons::opa::Error, + cm_name: String, + }, + + #[snafu(display("failed to get valid S3 connection"))] + GetS3Connection { source: crate::crd::Error }, + + #[snafu(display("failed to get deep storage bucket"))] + GetDeepStorageBucket { + source: stackable_operator::crd::s3::v1alpha1::BucketError, + }, + + #[snafu(display("failed to retrieve AuthenticationClass"))] + AuthenticationClassRetrieval { + source: crate::crd::authentication::Error, + }, +} + +type Result = std::result::Result; + +/// Kubernetes objects referenced from the DruidCluster spec, already fetched (and, for now, +/// partly validated by the existing helper functions). +pub struct DereferencedObjects { + pub zookeeper_connection_string: String, + pub opa_connection_string: Option, + pub s3_connection: Option, + pub deep_storage_bucket_name: Option, + pub resolved_authentication_classes: AuthenticationClassesResolved, +} + +/// Fetches all Kubernetes objects referenced from the [`v1alpha1::DruidCluster`] spec. +pub async fn dereference( + client: &Client, + druid: &v1alpha1::DruidCluster, +) -> Result { + let namespace = druid + .metadata + .namespace + .as_deref() + .context(ObjectHasNoNamespaceSnafu)?; + + let zk_confmap = druid.spec.cluster_config.zookeeper_config_map_name.clone(); + let zookeeper_connection_string = client + .get::(&zk_confmap, namespace) + .await + .context(GetZookeeperConnStringConfigMapSnafu { + cm_name: zk_confmap.clone(), + })? + .data + .and_then(|mut data| data.remove("ZOOKEEPER")) + .context(MissingZookeeperConnStringSnafu { + cm_name: zk_confmap.clone(), + })?; + + let opa_connection_string = if let Some(DruidAuthorization { opa: opa_config }) = + &druid.spec.cluster_config.authorization + { + Some( + opa_config + .full_document_url_from_config_map(client, druid, Some("allow"), &OpaApiVersion::V1) + .await + .context(GetOpaConnStringSnafu { + cm_name: opa_config.config_map_name.clone(), + })?, + ) + } else { + None + }; + + let s3_connection = druid + .get_s3_connection(client) + .await + .context(GetS3ConnectionSnafu)?; + + let deep_storage_bucket_name = match &druid.spec.cluster_config.deep_storage { + DeepStorageSpec::S3(s3_spec) => Some( + s3_spec + .bucket + .clone() + .resolve(client, namespace) + .await + .context(GetDeepStorageBucketSnafu)? + .bucket_name, + ), + _ => None, + }; + + let resolved_authentication_classes = + AuthenticationClassesResolved::from(&druid.spec.cluster_config, client) + .await + .context(AuthenticationClassRetrievalSnafu)?; + + Ok(DereferencedObjects { + zookeeper_connection_string, + opa_connection_string, + s3_connection, + deep_storage_bucket_name, + resolved_authentication_classes, + }) +} From 9f2cb8be80a71b71d4ff30cc37bba5345e32e74a Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 15:49:54 +0200 Subject: [PATCH 03/16] refactor: extract validate step from reconcile Move pure validation (product image resolution, TLS security construction, authentication config conversion, product-config validation) into a new `controller::validate` submodule that returns `ValidatedInputs`. Matches the trino-operator pattern (trino-operator commit 7004062). No behavior change. Co-Authored-By: Claude Sonnet 4.6 --- rust/operator-binary/src/controller.rs | 87 +++++---------- .../src/controller/validate.rs | 101 ++++++++++++++++++ 2 files changed, 126 insertions(+), 62 deletions(-) create mode 100644 rust/operator-binary/src/controller/validate.rs diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 0dea9553..7a3e2a16 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -27,7 +27,7 @@ use stackable_operator::{ cli::OperatorEnvironmentOptions, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, commons::{ - product_image_selection::{self, ResolvedProductImage}, + product_image_selection::ResolvedProductImage, rbac::build_rbac_resources, }, constants::RESTART_CONTROLLER_ENABLED_LABEL, @@ -48,7 +48,6 @@ use stackable_operator::{ }, kvp::{KeyValuePairError, LabelError, LabelValueError, Labels}, logging::controller::ReconcilerError, - product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, product_logging::{ self, framework::LoggingError, @@ -91,11 +90,12 @@ use crate::{ }; mod dereference; +mod validate; pub const DRUID_CONTROLLER_NAME: &str = "druidcluster"; pub const FULL_CONTROLLER_NAME: &str = concatcp!(DRUID_CONTROLLER_NAME, '.', OPERATOR_NAME); -const CONTAINER_IMAGE_BASE_NAME: &str = "druid"; +pub(super) const CONTAINER_IMAGE_BASE_NAME: &str = "druid"; // volume names const DRUID_CONFIG_VOLUME_NAME: &str = "config"; @@ -139,16 +139,6 @@ pub enum Error { rolegroup: RoleGroupRef, }, - #[snafu(display("invalid product configuration"))] - InvalidProductConfig { - source: stackable_operator::product_config_utils::Error, - }, - - #[snafu(display("invalid authentication configuration"))] - InvalidDruidAuthenticationConfig { - source: crate::authentication::Error, - }, - #[snafu(display("object is missing metadata to build owner reference"))] ObjectMissingMetadataForOwnerRef { source: stackable_operator::builder::meta::Error, @@ -162,11 +152,6 @@ pub enum Error { source: stackable_operator::crd::s3::v1alpha1::ConnectionError, }, - #[snafu(display("failed to transform configs"))] - ProductConfigTransform { - source: stackable_operator::product_config_utils::Error, - }, - #[snafu(display("failed to format runtime properties"))] PropertiesWriteError { source: PropertiesWriterError }, @@ -326,10 +311,8 @@ pub enum Error { #[snafu(display("failed to configure service"))] ServiceConfiguration { source: crate::service::Error }, - #[snafu(display("failed to resolve product image"))] - ResolveProductImage { - source: product_image_selection::Error, - }, + #[snafu(display("failed to validate cluster"))] + ValidateCluster { source: validate::Error }, #[snafu(display("invalid metadata database connection"))] InvalidMetadataDatabaseConnection { @@ -362,36 +345,13 @@ pub async fn reconcile_druid( .await .context(DereferenceSnafu)?; - let resolved_product_image = druid - .spec - .image - .resolve( - CONTAINER_IMAGE_BASE_NAME, - &ctx.operator_environment.image_repository, - crate::built_info::PKG_VERSION, - ) - .context(ResolveProductImageSnafu)?; - - let druid_tls_security = DruidTlsSecurity::new_from_druid_cluster( + let validated = validate::validate( druid, - &dereferenced_objects.resolved_authentication_classes, - ); - - let druid_auth_config = - DruidAuthenticationConfig::try_from( - dereferenced_objects.resolved_authentication_classes.clone(), - ) - .context(InvalidDruidAuthenticationConfigSnafu)?; - - let role_config = transform_all_roles_to_config(druid, &druid.build_role_properties()); - let validated_role_config = validate_all_roles_and_groups_config( - &resolved_product_image.product_version, - &role_config.context(ProductConfigTransformSnafu)?, + &dereferenced_objects, + &ctx.operator_environment, &ctx.product_config, - false, - false, ) - .context(InvalidProductConfigSnafu)?; + .context(ValidateClusterSnafu)?; let mut cluster_resources = ClusterResources::new( APP_NAME, @@ -425,7 +385,7 @@ pub async fn reconcile_druid( let mut ss_cond_builder = StatefulSetConditionBuilder::default(); - for (role_name, role_config) in validated_role_config.iter() { + for (role_name, role_config) in validated.validated_role_config.iter() { let druid_role = DruidRole::from_str(role_name).context(UnidentifiedDruidRoleSnafu { role: role_name.to_string(), })?; @@ -448,7 +408,7 @@ pub async fn reconcile_druid( let role_group_service_recommended_labels = build_recommended_labels( druid, DRUID_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, + &validated.resolved_product_image.app_version_label_value, &rolegroup.role, &rolegroup.role_group, ); @@ -463,7 +423,7 @@ pub async fn reconcile_druid( let rg_headless_service = build_rolegroup_headless_service( druid, - &druid_tls_security, + &validated.druid_tls_security, &druid_role, &rolegroup, role_group_service_recommended_labels.clone(), @@ -480,7 +440,7 @@ pub async fn reconcile_druid( let rg_configmap = build_rolegroup_config_map( druid, - &resolved_product_image, + &validated.resolved_product_image, &rolegroup, rolegroup_config, &merged_rolegroup_config, @@ -488,19 +448,19 @@ pub async fn reconcile_druid( dereferenced_objects.opa_connection_string.as_deref(), dereferenced_objects.s3_connection.as_ref(), dereferenced_objects.deep_storage_bucket_name.as_deref(), - &druid_tls_security, - &druid_auth_config, + &validated.druid_tls_security, + &validated.druid_auth_config, )?; let rg_statefulset = build_rolegroup_statefulset( druid, - &resolved_product_image, + &validated.resolved_product_image, &druid_role, &rolegroup, rolegroup_config, &merged_rolegroup_config, dereferenced_objects.s3_connection.as_ref(), - &druid_tls_security, - &druid_auth_config, + &validated.druid_tls_security, + &validated.druid_auth_config, &rbac_sa, )?; @@ -543,14 +503,14 @@ pub async fn reconcile_druid( build_recommended_labels( druid, DRUID_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, + &validated.resolved_product_image.app_version_label_value, role_name, "none", ), listener_class.to_string(), listener_group_name, &druid_role, - &druid_tls_security, + &validated.druid_tls_security, ) .context(ListenerConfigurationSnafu)?; @@ -564,8 +524,8 @@ pub async fn reconcile_druid( for discovery_cm in build_discovery_configmaps( druid, druid, - &resolved_product_image, - &druid_tls_security, + &validated.resolved_product_image, + &validated.druid_tls_security, listener, ) .await @@ -1302,6 +1262,9 @@ pub fn error_policy( mod test { use product_config::{ProductConfigManager, writer}; use rstest::*; + use stackable_operator::product_config_utils::{ + transform_all_roles_to_config, validate_all_roles_and_groups_config, + }; use super::*; use crate::crd::{PROP_SEGMENT_CACHE_LOCATIONS, authentication::AuthenticationClassesResolved}; diff --git a/rust/operator-binary/src/controller/validate.rs b/rust/operator-binary/src/controller/validate.rs new file mode 100644 index 00000000..7fe0b0b3 --- /dev/null +++ b/rust/operator-binary/src/controller/validate.rs @@ -0,0 +1,101 @@ +//! The validate step in the DruidCluster controller +//! +//! Synchronously validates inputs that don't require a Kubernetes client. Produces +//! [`ValidatedInputs`], consumed by the rest of `reconcile_druid`. + +use product_config::ProductConfigManager; +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + cli::OperatorEnvironmentOptions, + commons::product_image_selection::{self, ResolvedProductImage}, + product_config_utils::{ + ValidatedRoleConfigByPropertyKind, transform_all_roles_to_config, + validate_all_roles_and_groups_config, + }, +}; + +use crate::{ + authentication::DruidAuthenticationConfig, + controller::dereference::DereferencedObjects, + crd::{security::DruidTlsSecurity, v1alpha1}, +}; + +#[derive(Snafu, Debug)] +#[allow(clippy::enum_variant_names)] +pub enum Error { + #[snafu(display("failed to resolve product image"))] + ResolveProductImage { + source: product_image_selection::Error, + }, + + #[snafu(display("invalid authentication configuration"))] + InvalidDruidAuthenticationConfig { + source: crate::authentication::Error, + }, + + #[snafu(display("failed to transform configs"))] + ProductConfigTransform { + source: stackable_operator::product_config_utils::Error, + }, + + #[snafu(display("invalid product configuration"))] + InvalidProductConfig { + source: stackable_operator::product_config_utils::Error, + }, +} + +type Result = std::result::Result; + +/// Synchronous inputs the rest of `reconcile_druid` needs after dereferencing. +pub struct ValidatedInputs { + pub resolved_product_image: ResolvedProductImage, + pub druid_tls_security: DruidTlsSecurity, + pub druid_auth_config: Option, + pub validated_role_config: ValidatedRoleConfigByPropertyKind, +} + +/// Validates the cluster spec and the dereferenced inputs. +pub fn validate( + druid: &v1alpha1::DruidCluster, + dereferenced_objects: &DereferencedObjects, + operator_environment: &OperatorEnvironmentOptions, + product_config: &ProductConfigManager, +) -> Result { + let resolved_product_image = druid + .spec + .image + .resolve( + super::CONTAINER_IMAGE_BASE_NAME, + &operator_environment.image_repository, + crate::built_info::PKG_VERSION, + ) + .context(ResolveProductImageSnafu)?; + + let druid_tls_security = DruidTlsSecurity::new_from_druid_cluster( + druid, + &dereferenced_objects.resolved_authentication_classes, + ); + + let druid_auth_config = DruidAuthenticationConfig::try_from( + dereferenced_objects.resolved_authentication_classes.clone(), + ) + .context(InvalidDruidAuthenticationConfigSnafu)?; + + let role_config = transform_all_roles_to_config(druid, &druid.build_role_properties()) + .context(ProductConfigTransformSnafu)?; + let validated_role_config = validate_all_roles_and_groups_config( + &resolved_product_image.product_version, + &role_config, + product_config, + false, + false, + ) + .context(InvalidProductConfigSnafu)?; + + Ok(ValidatedInputs { + resolved_product_image, + druid_tls_security, + druid_auth_config, + validated_role_config, + }) +} From 3def6feede8373360feeffd8456ddab9531be572 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 17:09:45 +0200 Subject: [PATCH 04/16] docs: update stale druid_controller.rs reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the druid_controller → controller module rename. --- rust/operator-binary/src/crd/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 332db249..11901b6e 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -420,7 +420,7 @@ impl v1alpha1::DruidCluster { AUTH_AUTHORIZER_OPA_TYPE.to_string(), Some(AUTH_AUTHORIZER_OPA_TYPE_VALUE.to_string()), ); - // The opaUri still needs to be set, but that requires a discovery config map and is handled in the druid_controller.rs + // The opaUri still needs to be set, but that requires a discovery config map and is handled in the controller.rs } // deep storage result.insert( From e8683e3ce8c9b48bc7cd62750d1e83bbf6f4ab79 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 17:11:07 +0200 Subject: [PATCH 05/16] fix: run fmt --- rust/operator-binary/src/controller.rs | 5 +---- rust/operator-binary/src/controller/dereference.rs | 5 +---- rust/operator-binary/src/main.rs | 2 +- rust/operator-binary/src/operations/pdb.rs | 2 +- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/rust/operator-binary/src/controller.rs b/rust/operator-binary/src/controller.rs index 7a3e2a16..2f6135e4 100644 --- a/rust/operator-binary/src/controller.rs +++ b/rust/operator-binary/src/controller.rs @@ -26,10 +26,7 @@ use stackable_operator::{ }, cli::OperatorEnvironmentOptions, cluster_resources::{ClusterResourceApplyStrategy, ClusterResources}, - commons::{ - product_image_selection::ResolvedProductImage, - rbac::build_rbac_resources, - }, + commons::{product_image_selection::ResolvedProductImage, rbac::build_rbac_resources}, constants::RESTART_CONTROLLER_ENABLED_LABEL, crd::s3, database_connections::drivers::jdbc::JdbcDatabaseConnection as _, diff --git a/rust/operator-binary/src/controller/dereference.rs b/rust/operator-binary/src/controller/dereference.rs index 37abde0f..bd4098a8 100644 --- a/rust/operator-binary/src/controller/dereference.rs +++ b/rust/operator-binary/src/controller/dereference.rs @@ -9,10 +9,7 @@ use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ - client::Client, - commons::opa::OpaApiVersion, - crd::s3, - k8s_openapi::api::core::v1::ConfigMap, + client::Client, commons::opa::OpaApiVersion, crd::s3, k8s_openapi::api::core::v1::ConfigMap, }; use crate::crd::{ diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index f2206e1b..1176b910 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -39,9 +39,9 @@ use crate::{ mod authentication; mod config; +mod controller; mod crd; mod discovery; -mod controller; mod extensions; mod internal_secret; mod listener; diff --git a/rust/operator-binary/src/operations/pdb.rs b/rust/operator-binary/src/operations/pdb.rs index 1ceb9e47..da25223c 100644 --- a/rust/operator-binary/src/operations/pdb.rs +++ b/rust/operator-binary/src/operations/pdb.rs @@ -5,8 +5,8 @@ use stackable_operator::{ }; use crate::{ - crd::{APP_NAME, DruidRole, OPERATOR_NAME, v1alpha1}, controller::DRUID_CONTROLLER_NAME, + crd::{APP_NAME, DruidRole, OPERATOR_NAME, v1alpha1}, }; #[derive(Snafu, Debug)] From 70ad1098a10b31b6d257a87920ce60da1ac30156 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 18:10:16 +0200 Subject: [PATCH 06/16] refactor(tests): scope 50-assert to cluster readiness only Move the StatefulSet and PodDisruptionBudget shape assertions into the upcoming 52-assert (resource shape) so that 50-assert mirrors trino-operator's 10-assert (readiness-only). The duplicated broker PDB and missing coordinator PDB will be corrected in 52-assert. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/50-assert.yaml | 139 --------------------- 1 file changed, 139 deletions(-) diff --git a/tests/templates/kuttl/smoke/50-assert.yaml b/tests/templates/kuttl/smoke/50-assert.yaml index f86362f7..97602e31 100644 --- a/tests/templates/kuttl/smoke/50-assert.yaml +++ b/tests/templates/kuttl/smoke/50-assert.yaml @@ -4,142 +4,3 @@ kind: TestAssert timeout: 600 commands: - script: kubectl -n $NAMESPACE wait --for=condition=available druidclusters.druid.stackable.tech/druid --timeout 301s ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: druid-broker-default - labels: - restarter.stackable.tech/enabled: "true" -spec: - template: - spec: - terminationGracePeriodSeconds: 1 -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: druid-coordinator-default - labels: - restarter.stackable.tech/enabled: "true" -spec: - template: - spec: - terminationGracePeriodSeconds: 1 -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: druid-historical-default - labels: - restarter.stackable.tech/enabled: "true" -spec: - template: - spec: - terminationGracePeriodSeconds: 1 - volumes: - - name: tls-mount - ephemeral: - volumeClaimTemplate: - metadata: - annotations: - secrets.stackable.tech/class: tls - spec: - storageClassName: secrets.stackable.tech - - name: tls - emptyDir: {} - - name: config - configMap: - name: druid-historical-default - - name: rwconfig - emptyDir: {} - - name: log-config - - name: log - - name: hdfs - configMap: - name: druid-hdfs - - name: segment-cache - emptyDir: - sizeLimit: 1G -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: druid-middlemanager-default - labels: - restarter.stackable.tech/enabled: "true" -spec: - template: - spec: - terminationGracePeriodSeconds: 1 -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: druid-router-default - labels: - restarter.stackable.tech/enabled: "true" -spec: - template: - spec: - terminationGracePeriodSeconds: 1 -status: - readyReplicas: 1 - replicas: 1 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: druid-broker -status: - expectedPods: 1 - currentHealthy: 1 - disruptionsAllowed: 1 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: druid-historical -status: - expectedPods: 1 - currentHealthy: 1 - disruptionsAllowed: 1 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: druid-broker -status: - expectedPods: 1 - currentHealthy: 1 - disruptionsAllowed: 1 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: druid-middlemanager -status: - expectedPods: 1 - currentHealthy: 1 - disruptionsAllowed: 1 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: druid-router -status: - expectedPods: 1 - currentHealthy: 1 - disruptionsAllowed: 1 From 1c504fa7bab1a04122ff39dd811ed35d65399f76 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 18:28:47 +0200 Subject: [PATCH 07/16] test(smoke): add 52-assert shape checks for druid resources Declarative shape assertions for every operator-managed druid resource in the smoke test (5 StatefulSets, 3 ClusterIP + 5 headless Services, 3 Listeners, 5 PDBs, the shared internal Secret, the ServiceAccount, and the RoleBinding). Mirrors trino's 13-assert and kafka's 33-assert.yaml.j2 patterns. The duplicative *-metrics Services are intentionally not asserted. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/52-assert.yaml.j2 | 492 ++++++++++++++++++ 1 file changed, 492 insertions(+) create mode 100644 tests/templates/kuttl/smoke/52-assert.yaml.j2 diff --git a/tests/templates/kuttl/smoke/52-assert.yaml.j2 b/tests/templates/kuttl/smoke/52-assert.yaml.j2 new file mode 100644 index 00000000..4c7fa35a --- /dev/null +++ b/tests/templates/kuttl/smoke/52-assert.yaml.j2 @@ -0,0 +1,492 @@ +{# Templating flag used throughout this file. #} +{% set vector_enabled = lookup('env', 'VECTOR_AGGREGATOR') | length > 0 %} +--- +# Declarative shape assertions for every druid-managed resource in the smoke +# test except ConfigMap *.data* (covered in 53-assert.yaml.j2) and the +# duplicative *-metrics Services. +# +# kuttl performs subset matching: any field omitted here is not checked, and +# the live object may carry additional keys/labels. We therefore omit fields +# that are random per install (uids, resourceVersion, clusterIP) and per- +# container `resources`/`env` (kuttl matches array elements positionally and +# these are mostly default values). +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +--- +# ----- StatefulSets (5) ----- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-broker-default + labels: + app.kubernetes.io/component: broker + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + restarter.stackable.tech/enabled: "true" + stackable.tech/vendor: Stackable + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: broker + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + serviceName: druid-broker-default-headless + template: + spec: + serviceAccount: druid-serviceaccount + serviceAccountName: druid-serviceaccount + terminationGracePeriodSeconds: 1 +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-coordinator-default + labels: + app.kubernetes.io/component: coordinator + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + restarter.stackable.tech/enabled: "true" + stackable.tech/vendor: Stackable + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: coordinator + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + serviceName: druid-coordinator-default-headless + template: + spec: + serviceAccount: druid-serviceaccount + serviceAccountName: druid-serviceaccount + terminationGracePeriodSeconds: 1 +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-historical-default + labels: + app.kubernetes.io/component: historical + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + restarter.stackable.tech/enabled: "true" + stackable.tech/vendor: Stackable + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: historical + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + serviceName: druid-historical-default-headless + template: + spec: + serviceAccount: druid-serviceaccount + serviceAccountName: druid-serviceaccount + terminationGracePeriodSeconds: 1 + volumes: + - name: tls-mount + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: tls + spec: + storageClassName: secrets.stackable.tech + - name: tls + emptyDir: {} + - name: config + configMap: + name: druid-historical-default + - name: rwconfig + emptyDir: {} + - name: log-config + - name: log + - name: hdfs + configMap: + name: druid-hdfs + - name: segment-cache + emptyDir: + sizeLimit: 1G +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-middlemanager-default + labels: + app.kubernetes.io/component: middlemanager + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + restarter.stackable.tech/enabled: "true" + stackable.tech/vendor: Stackable + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: middlemanager + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + serviceName: druid-middlemanager-default-headless + template: + spec: + serviceAccount: druid-serviceaccount + serviceAccountName: druid-serviceaccount + terminationGracePeriodSeconds: 1 +status: + readyReplicas: 1 + replicas: 1 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: druid-router-default + labels: + app.kubernetes.io/component: router + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + restarter.stackable.tech/enabled: "true" + stackable.tech/vendor: Stackable + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: router + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default + serviceName: druid-router-default-headless + template: + spec: + serviceAccount: druid-serviceaccount + serviceAccountName: druid-serviceaccount + terminationGracePeriodSeconds: 1 +status: + readyReplicas: 1 + replicas: 1 +--- +# ----- ClusterIP Services (broker / coordinator / router) ----- +# These are owned by the Listener (not DruidCluster), so ownerReferences are +# intentionally omitted here to avoid false-negative subset-match failures. +apiVersion: v1 +kind: Service +metadata: + name: druid-broker +spec: + ports: + - name: https + port: 8282 + protocol: TCP + targetPort: 8282 + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + name: druid-coordinator +spec: + ports: + - name: https + port: 8281 + protocol: TCP + targetPort: 8281 + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + name: druid-router +spec: + ports: + - name: https + port: 9088 + protocol: TCP + targetPort: 9088 + type: ClusterIP +--- +# ----- Headless Services (one per role-group) ----- +apiVersion: v1 +kind: Service +metadata: + name: druid-broker-default-headless + labels: + app.kubernetes.io/component: broker + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default +spec: + clusterIP: None + ports: + - name: https + port: 8282 + protocol: TCP + targetPort: 8282 + publishNotReadyAddresses: true + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + name: druid-coordinator-default-headless + labels: + app.kubernetes.io/component: coordinator + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default +spec: + clusterIP: None + ports: + - name: https + port: 8281 + protocol: TCP + targetPort: 8281 + publishNotReadyAddresses: true + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + name: druid-historical-default-headless + labels: + app.kubernetes.io/component: historical + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default +spec: + clusterIP: None + ports: + - name: https + port: 8283 + protocol: TCP + targetPort: 8283 + publishNotReadyAddresses: true + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + name: druid-middlemanager-default-headless + labels: + app.kubernetes.io/component: middlemanager + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default +spec: + clusterIP: None + ports: + - name: https + port: 8291 + protocol: TCP + targetPort: 8291 + publishNotReadyAddresses: true + type: ClusterIP +--- +apiVersion: v1 +kind: Service +metadata: + name: druid-router-default-headless + labels: + app.kubernetes.io/component: router + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default +spec: + clusterIP: None + ports: + - name: https + port: 9088 + protocol: TCP + targetPort: 9088 + publishNotReadyAddresses: true + type: ClusterIP +--- +# ----- Listeners (broker / coordinator / router) ----- +apiVersion: listeners.stackable.tech/v1alpha1 +kind: Listener +metadata: + name: druid-broker + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +spec: + className: cluster-internal + ports: + - name: https + port: 8282 + protocol: TCP +--- +apiVersion: listeners.stackable.tech/v1alpha1 +kind: Listener +metadata: + name: druid-coordinator + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +spec: + className: cluster-internal + ports: + - name: https + port: 8281 + protocol: TCP +--- +apiVersion: listeners.stackable.tech/v1alpha1 +kind: Listener +metadata: + name: druid-router + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +spec: + className: cluster-internal + ports: + - name: https + port: 9088 + protocol: TCP +--- +# ----- PodDisruptionBudgets (5) ----- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-broker +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-coordinator +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-historical +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-middlemanager +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: druid-router +status: + expectedPods: 1 + currentHealthy: 1 + disruptionsAllowed: 1 +--- +# ----- Secret / ServiceAccount / RoleBinding ----- +apiVersion: v1 +kind: Secret +metadata: + name: druid-shared-internal-secret + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +type: Opaque +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: druid-serviceaccount + labels: + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid + ownerReferences: + - apiVersion: druid.stackable.tech/v1alpha1 + controller: true + kind: DruidCluster + name: druid +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: druid-rolebinding + labels: + app.kubernetes.io/instance: druid + app.kubernetes.io/managed-by: druid.stackable.tech_druidcluster + app.kubernetes.io/name: druid +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: druid-clusterrole +subjects: + - kind: ServiceAccount + name: druid-serviceaccount From 9f83a015079f79a16c0aa2a9eca7f3a2c1118f8c Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 18:51:51 +0200 Subject: [PATCH 08/16] test(smoke): tighten 52-assert volume + selector assertions Fill in the bare log-config/log volume entries on the historical StatefulSet so the assertion matches the live shape, and add the selector blocks on the five headless Services (mirrors the trino/kafka equivalents). Also clarify the vector_enabled comment. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/52-assert.yaml.j2 | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tests/templates/kuttl/smoke/52-assert.yaml.j2 b/tests/templates/kuttl/smoke/52-assert.yaml.j2 index 4c7fa35a..0148c335 100644 --- a/tests/templates/kuttl/smoke/52-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/52-assert.yaml.j2 @@ -1,4 +1,6 @@ -{# Templating flag used throughout this file. #} +{# vector_enabled is consumed by 53-assert.yaml.j2 conditionals; declared here + too so future container-level (vector sidecar) assertions can use it without + adding the lookup again. #} {% set vector_enabled = lookup('env', 'VECTOR_AGGREGATOR') | length > 0 %} --- # Declarative shape assertions for every druid-managed resource in the smoke @@ -133,7 +135,11 @@ spec: - name: rwconfig emptyDir: {} - name: log-config + configMap: + name: druid-historical-default - name: log + emptyDir: + sizeLimit: 30Mi - name: hdfs configMap: name: druid-hdfs @@ -272,6 +278,11 @@ spec: protocol: TCP targetPort: 8282 publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: broker + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default type: ClusterIP --- apiVersion: v1 @@ -292,6 +303,11 @@ spec: protocol: TCP targetPort: 8281 publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: coordinator + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default type: ClusterIP --- apiVersion: v1 @@ -312,6 +328,11 @@ spec: protocol: TCP targetPort: 8283 publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: historical + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default type: ClusterIP --- apiVersion: v1 @@ -332,6 +353,11 @@ spec: protocol: TCP targetPort: 8291 publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: middlemanager + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default type: ClusterIP --- apiVersion: v1 @@ -352,6 +378,11 @@ spec: protocol: TCP targetPort: 9088 publishNotReadyAddresses: true + selector: + app.kubernetes.io/component: router + app.kubernetes.io/instance: druid + app.kubernetes.io/name: druid + app.kubernetes.io/role-group: default type: ClusterIP --- # ----- Listeners (broker / coordinator / router) ----- From c0580f3dc416e87df9b9c1d321d76abe0f5501bf Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 19:04:44 +0200 Subject: [PATCH 09/16] test(smoke): add 53-assert discovery cm + secret snapshot First slice of the ConfigMap snapshot assertion: the discovery ConfigMap and the operator-managed Secret's key presence. The five role-group ConfigMap snapshots follow in the next commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/53-assert.yaml.j2 | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/templates/kuttl/smoke/53-assert.yaml.j2 diff --git a/tests/templates/kuttl/smoke/53-assert.yaml.j2 b/tests/templates/kuttl/smoke/53-assert.yaml.j2 new file mode 100644 index 00000000..8015a8da --- /dev/null +++ b/tests/templates/kuttl/smoke/53-assert.yaml.j2 @@ -0,0 +1,49 @@ +{# vector_enabled gates conditional appender lines added to role-group + ConfigMaps when VECTOR_AGGREGATOR is set in the environment. The five + role-group ConfigMaps (broker/coordinator/historical/middlemanager/router) + are added in a follow-up commit; the flag is declared here for forward + reference. #} +{% set vector_enabled = lookup('env', 'VECTOR_AGGREGATOR') | length > 0 %} +--- +# Snapshot the full `.data` of each operator-managed ConfigMap. +# Any code change that alters rendered config values will fail these diffs. +# +# Runs as its own step (after 50/51/52) so kuttl does not re-evaluate the heavy +# heredocs on every 1-second readiness retry of the install step. By this +# point the cluster is in steady state, so each script runs once. +# +# The heredoc is quoted (`<<'YAMLEOF'`) so shell substitution is disabled and +# Java-properties escapes like `${env\:VAR}` survive verbatim. After the +# heredoc, `sed` substitutes `__NAMESPACE__` (kuttl namespaces are random per +# install). For role-group ConfigMaps (added in a follow-up commit) the +# *actual* side is additionally normalized with `s|/znode-[a-f0-9-]+|/znode-__ZNODE__|` +# because the zk operator generates a new znode UUID per install. Both sides +# are then canonicalized to JSON via `yq -o=json`; keys are already +# alphabetical on both sides (operator stores BTreeMap; kubectl serializes +# maps sorted; the heredoc is hand-sorted). +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 60 +commands: + # Operator-managed Secret: verify the data keys exist. Values are random + # per install and are not asserted. + - script: kubectl -n $NAMESPACE get secret druid-shared-internal-secret -o yaml | yq -e '.data | has("INTERNAL_INITIAL_CLIENT_PASSWORD")' + - script: kubectl -n $NAMESPACE get secret druid-shared-internal-secret -o yaml | yq -e '.data | has("OIDC_COOKIE_PASSPHRASE")' + + # Discovery ConfigMap + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + DRUID_AVATICA_JDBC: jdbc:avatica:remote:url=http://druid-router.__NAMESPACE__.svc.cluster.local:9088/druid/v2/sql/avatica/ + DRUID_ROUTER: druid-router.__NAMESPACE__.svc.cluster.local:9088 + DRUID_SQLALCHEMY: druid://druid-router.__NAMESPACE__.svc.cluster.local:9088/druid/v2/sql + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm druid -o yaml | yq -o=json '.data') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap druid data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi From df591a202318837ca2673a94190b2286e9ce93b4 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 20:03:20 +0200 Subject: [PATCH 10/16] test(smoke): snapshot all five role-group ConfigMaps Adds full .data snapshots for druid-{broker,coordinator,historical, middlemanager,router}-default ConfigMaps. The actual side is normalized for namespace and the random znode UUID before diffing. The current snapshot was captured with VECTOR_AGGREGATOR unset. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/53-assert.yaml.j2 | 504 +++++++++++++++++- 1 file changed, 497 insertions(+), 7 deletions(-) diff --git a/tests/templates/kuttl/smoke/53-assert.yaml.j2 b/tests/templates/kuttl/smoke/53-assert.yaml.j2 index 8015a8da..044e81b7 100644 --- a/tests/templates/kuttl/smoke/53-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/53-assert.yaml.j2 @@ -1,9 +1,3 @@ -{# vector_enabled gates conditional appender lines added to role-group - ConfigMaps when VECTOR_AGGREGATOR is set in the environment. The five - role-group ConfigMaps (broker/coordinator/historical/middlemanager/router) - are added in a follow-up commit; the flag is declared here for forward - reference. #} -{% set vector_enabled = lookup('env', 'VECTOR_AGGREGATOR') | length > 0 %} --- # Snapshot the full `.data` of each operator-managed ConfigMap. # Any code change that alters rendered config values will fail these diffs. @@ -15,7 +9,7 @@ # The heredoc is quoted (`<<'YAMLEOF'`) so shell substitution is disabled and # Java-properties escapes like `${env\:VAR}` survive verbatim. After the # heredoc, `sed` substitutes `__NAMESPACE__` (kuttl namespaces are random per -# install). For role-group ConfigMaps (added in a follow-up commit) the +# install). For role-group ConfigMaps the # *actual* side is additionally normalized with `s|/znode-[a-f0-9-]+|/znode-__ZNODE__|` # because the zk operator generates a new znode UUID per install. Both sides # are then canonicalized to JSON via `yq -o=json`; keys are already @@ -47,3 +41,499 @@ commands: printf '%s\n' "$actual" exit 1 fi + + # Role-group ConfigMap: druid-broker-default + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + jvm.config: |- + -server + -Xmx800m + -Xms800m + -XX:MaxDirectMemorySize=400m + -XX:+ExitOnOutOfMemoryError + -XX:+UseG1GC + -Djava.security.properties=/stackable/rwconfig/security.properties + -Duser.timezone=UTC + -Dfile.encoding=UTF-8 + -Djava.io.tmpdir=/tmp + -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager + -Dlog4j.configurationFile=/stackable/rwconfig/log4j2.properties + -Djavax.net.ssl.trustStore=/stackable/truststore.p12 + -Djavax.net.ssl.trustStorePassword=changeit + -Djavax.net.ssl.trustStoreType=pkcs12 + log4j2.properties: |- + appenders = FILE, CONSOLE + + appender.CONSOLE.type = Console + appender.CONSOLE.name = CONSOLE + appender.CONSOLE.target = SYSTEM_ERR + appender.CONSOLE.layout.type = PatternLayout + appender.CONSOLE.layout.pattern = %d{ISO8601} %p [%t] %c - %m%n + appender.CONSOLE.filter.threshold.type = ThresholdFilter + appender.CONSOLE.filter.threshold.level = INFO + + appender.FILE.type = RollingFile + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/druid/druid.log4j2.xml + appender.FILE.filePattern = /stackable/log/druid/druid.log4j2.xml.%i + appender.FILE.layout.type = XMLLayout + appender.FILE.policies.type = Policies + appender.FILE.policies.size.type = SizeBasedTriggeringPolicy + appender.FILE.policies.size.size = 5MB + appender.FILE.strategy.type = DefaultRolloverStrategy + appender.FILE.strategy.max = 1 + appender.FILE.filter.threshold.type = ThresholdFilter + appender.FILE.filter.threshold.level = INFO + + + rootLogger.level=INFO + rootLogger.appenderRefs = CONSOLE, FILE + rootLogger.appenderRef.CONSOLE.ref = CONSOLE + rootLogger.appenderRef.FILE.ref = FILE + runtime.properties: | + druid.auth.basic.ssl.trustStorePassword=changeit + druid.auth.basic.ssl.trustStorePath=/stackable/tls/truststore.p12 + druid.auth.basic.ssl.trustStoreType=pkcs12 + druid.client.https.trustStorePassword=changeit + druid.client.https.trustStorePath=/stackable/tls/truststore.p12 + druid.client.https.trustStoreType=pkcs12 + druid.emitter=prometheus + druid.emitter.prometheus.namespace=druid + druid.emitter.prometheus.port=9090 + druid.emitter.prometheus.strategy=exporter + druid.enablePlaintextPort=false + druid.enableTlsPort=true + druid.extensions.loadList=["druid-basic-security",\ "druid-datasketches",\ "druid-hdfs-storage",\ "druid-kafka-indexing-service",\ "druid-opa-authorizer",\ "postgresql-metadata-storage",\ "prometheus-emitter",\ "simple-client-sslcontext"] + druid.indexer.logs.directory=/stackable/var/druid/indexing-logs + druid.metadata.storage.connector.connectURI=jdbc\:postgresql\://druid-postgresql\:5432/druid + druid.metadata.storage.connector.password=${env\:METADATA_DATABASE_PASSWORD} + druid.metadata.storage.connector.user=${env\:METADATA_DATABASE_USERNAME} + druid.metadata.storage.type=postgresql + druid.monitoring.monitors=["org.apache.druid.java.util.metrics.JvmMonitor"] + druid.processing.tmpDir=/stackable/var/druid/processing + druid.server.https.certAlias=1 + druid.server.https.keyStorePassword=changeit + druid.server.https.keyStorePath=/stackable/tls/keystore.p12 + druid.server.https.keyStoreType=pkcs12 + druid.startup.logging.logProperties=true + druid.storage.bucket= + druid.storage.storageDirectory=/druid + druid.storage.type=hdfs + druid.tlsPort=8282 + druid.zk.service.host=druid-zk-server.__NAMESPACE__.svc.cluster.local\:2282/znode-__ZNODE__ + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm druid-broker-default -o yaml \ + | yq -o=json '.data' \ + | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap druid-broker-default data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi + + # Role-group ConfigMap: druid-coordinator-default + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + jvm.config: |- + -server + -Xmx468m + -Xms468m + -XX:+ExitOnOutOfMemoryError + -XX:+UseG1GC + -Djava.security.properties=/stackable/rwconfig/security.properties + -Duser.timezone=UTC + -Dfile.encoding=UTF-8 + -Djava.io.tmpdir=/tmp + -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager + -Dlog4j.configurationFile=/stackable/rwconfig/log4j2.properties + -Djavax.net.ssl.trustStore=/stackable/truststore.p12 + -Djavax.net.ssl.trustStorePassword=changeit + -Djavax.net.ssl.trustStoreType=pkcs12 + -Dderby.stream.error.file=/stackable/var/druid/derby.log + log4j2.properties: |- + appenders = FILE, CONSOLE + + appender.CONSOLE.type = Console + appender.CONSOLE.name = CONSOLE + appender.CONSOLE.target = SYSTEM_ERR + appender.CONSOLE.layout.type = PatternLayout + appender.CONSOLE.layout.pattern = %d{ISO8601} %p [%t] %c - %m%n + appender.CONSOLE.filter.threshold.type = ThresholdFilter + appender.CONSOLE.filter.threshold.level = INFO + + appender.FILE.type = RollingFile + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/druid/druid.log4j2.xml + appender.FILE.filePattern = /stackable/log/druid/druid.log4j2.xml.%i + appender.FILE.layout.type = XMLLayout + appender.FILE.policies.type = Policies + appender.FILE.policies.size.type = SizeBasedTriggeringPolicy + appender.FILE.policies.size.size = 5MB + appender.FILE.strategy.type = DefaultRolloverStrategy + appender.FILE.strategy.max = 1 + appender.FILE.filter.threshold.type = ThresholdFilter + appender.FILE.filter.threshold.level = INFO + + + rootLogger.level=INFO + rootLogger.appenderRefs = CONSOLE, FILE + rootLogger.appenderRef.CONSOLE.ref = CONSOLE + rootLogger.appenderRef.FILE.ref = FILE + runtime.properties: | + druid.auth.basic.ssl.trustStorePassword=changeit + druid.auth.basic.ssl.trustStorePath=/stackable/tls/truststore.p12 + druid.auth.basic.ssl.trustStoreType=pkcs12 + druid.client.https.trustStorePassword=changeit + druid.client.https.trustStorePath=/stackable/tls/truststore.p12 + druid.client.https.trustStoreType=pkcs12 + druid.coordinator.asOverlord.enabled=true + druid.coordinator.asOverlord.overlordService=druid/overlord + druid.coordinator.period=PT20S + druid.coordinator.startDelay=PT20S + druid.emitter=prometheus + druid.emitter.prometheus.namespace=druid + druid.emitter.prometheus.port=9090 + druid.emitter.prometheus.strategy=exporter + druid.enablePlaintextPort=false + druid.enableTlsPort=true + druid.extensions.loadList=["druid-basic-security",\ "druid-datasketches",\ "druid-hdfs-storage",\ "druid-kafka-indexing-service",\ "druid-opa-authorizer",\ "postgresql-metadata-storage",\ "prometheus-emitter",\ "simple-client-sslcontext"] + druid.indexer.logs.directory=/stackable/var/druid/indexing-logs + druid.indexer.queue.startDelay=PT20S + druid.indexer.runner.type=remote + druid.indexer.storage.type=metadata + druid.metadata.storage.connector.connectURI=jdbc\:postgresql\://druid-postgresql\:5432/druid + druid.metadata.storage.connector.password=${env\:METADATA_DATABASE_PASSWORD} + druid.metadata.storage.connector.user=${env\:METADATA_DATABASE_USERNAME} + druid.metadata.storage.type=postgresql + druid.monitoring.monitors=["org.apache.druid.java.util.metrics.JvmMonitor"] + druid.server.https.certAlias=1 + druid.server.https.keyStorePassword=changeit + druid.server.https.keyStorePath=/stackable/tls/keystore.p12 + druid.server.https.keyStoreType=pkcs12 + druid.startup.logging.logProperties=true + druid.storage.bucket= + druid.storage.storageDirectory=/druid + druid.storage.type=hdfs + druid.tlsPort=8281 + druid.zk.service.host=druid-zk-server.__NAMESPACE__.svc.cluster.local\:2282/znode-__ZNODE__ + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm druid-coordinator-default -o yaml \ + | yq -o=json '.data' \ + | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap druid-coordinator-default data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi + + # Role-group ConfigMap: druid-historical-default + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + jvm.config: |- + -server + -Xmx900m + -Xms900m + -XX:MaxDirectMemorySize=300m + -XX:+ExitOnOutOfMemoryError + -XX:+UseG1GC + -Djava.security.properties=/stackable/rwconfig/security.properties + -Duser.timezone=UTC + -Dfile.encoding=UTF-8 + -Djava.io.tmpdir=/tmp + -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager + -Dlog4j.configurationFile=/stackable/rwconfig/log4j2.properties + -Djavax.net.ssl.trustStore=/stackable/truststore.p12 + -Djavax.net.ssl.trustStorePassword=changeit + -Djavax.net.ssl.trustStoreType=pkcs12 + log4j2.properties: |- + appenders = FILE, CONSOLE + + appender.CONSOLE.type = Console + appender.CONSOLE.name = CONSOLE + appender.CONSOLE.target = SYSTEM_ERR + appender.CONSOLE.layout.type = PatternLayout + appender.CONSOLE.layout.pattern = %d{ISO8601} %p [%t] %c - %m%n + appender.CONSOLE.filter.threshold.type = ThresholdFilter + appender.CONSOLE.filter.threshold.level = INFO + + appender.FILE.type = RollingFile + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/druid/druid.log4j2.xml + appender.FILE.filePattern = /stackable/log/druid/druid.log4j2.xml.%i + appender.FILE.layout.type = XMLLayout + appender.FILE.policies.type = Policies + appender.FILE.policies.size.type = SizeBasedTriggeringPolicy + appender.FILE.policies.size.size = 5MB + appender.FILE.strategy.type = DefaultRolloverStrategy + appender.FILE.strategy.max = 1 + appender.FILE.filter.threshold.type = ThresholdFilter + appender.FILE.filter.threshold.level = INFO + + + rootLogger.level=INFO + rootLogger.appenderRefs = CONSOLE, FILE + rootLogger.appenderRef.CONSOLE.ref = CONSOLE + rootLogger.appenderRef.FILE.ref = FILE + runtime.properties: | + druid.auth.basic.ssl.trustStorePassword=changeit + druid.auth.basic.ssl.trustStorePath=/stackable/tls/truststore.p12 + druid.auth.basic.ssl.trustStoreType=pkcs12 + druid.client.https.trustStorePassword=changeit + druid.client.https.trustStorePath=/stackable/tls/truststore.p12 + druid.client.https.trustStoreType=pkcs12 + druid.emitter=prometheus + druid.emitter.prometheus.namespace=druid + druid.emitter.prometheus.port=9090 + druid.emitter.prometheus.strategy=exporter + druid.enablePlaintextPort=false + druid.enableTlsPort=true + druid.extensions.loadList=["druid-basic-security",\ "druid-datasketches",\ "druid-hdfs-storage",\ "druid-kafka-indexing-service",\ "druid-opa-authorizer",\ "postgresql-metadata-storage",\ "prometheus-emitter",\ "simple-client-sslcontext"] + druid.historical.cache.populateCache=true + druid.historical.cache.useCache=true + druid.indexer.logs.directory=/stackable/var/druid/indexing-logs + druid.metadata.storage.connector.connectURI=jdbc\:postgresql\://druid-postgresql\:5432/druid + druid.metadata.storage.connector.password=${env\:METADATA_DATABASE_PASSWORD} + druid.metadata.storage.connector.user=${env\:METADATA_DATABASE_USERNAME} + druid.metadata.storage.type=postgresql + druid.monitoring.monitors=["org.apache.druid.java.util.metrics.JvmMonitor"] + druid.processing.buffer.sizeBytes=76800Ki + druid.processing.numMergeBuffers=2 + druid.processing.numThreads=1 + druid.processing.tmpDir=/stackable/var/druid/processing + druid.segmentCache.locations=[{"path"\:"/stackable/var/druid/segment-cache","maxSize"\:"1G","freeSpacePercent"\:"5"}] + druid.server.https.certAlias=1 + druid.server.https.keyStorePassword=changeit + druid.server.https.keyStorePath=/stackable/tls/keystore.p12 + druid.server.https.keyStoreType=pkcs12 + druid.startup.logging.logProperties=true + druid.storage.bucket= + druid.storage.storageDirectory=/druid + druid.storage.type=hdfs + druid.tlsPort=8283 + druid.zk.service.host=druid-zk-server.__NAMESPACE__.svc.cluster.local\:2282/znode-__ZNODE__ + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm druid-historical-default -o yaml \ + | yq -o=json '.data' \ + | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap druid-historical-default data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi + + # Role-group ConfigMap: druid-middlemanager-default + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + jvm.config: |- + -server + -Xmx1200m + -Xms1200m + -XX:+ExitOnOutOfMemoryError + -XX:+UseG1GC + -Djava.security.properties=/stackable/rwconfig/security.properties + -Duser.timezone=UTC + -Dfile.encoding=UTF-8 + -Djava.io.tmpdir=/tmp + -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager + -Dlog4j.configurationFile=/stackable/rwconfig/log4j2.properties + -Djavax.net.ssl.trustStore=/stackable/truststore.p12 + -Djavax.net.ssl.trustStorePassword=changeit + -Djavax.net.ssl.trustStoreType=pkcs12 + log4j2.properties: |- + appenders = FILE, CONSOLE + + appender.CONSOLE.type = Console + appender.CONSOLE.name = CONSOLE + appender.CONSOLE.target = SYSTEM_ERR + appender.CONSOLE.layout.type = PatternLayout + appender.CONSOLE.layout.pattern = %d{ISO8601} %p [%t] %c - %m%n + appender.CONSOLE.filter.threshold.type = ThresholdFilter + appender.CONSOLE.filter.threshold.level = INFO + + appender.FILE.type = RollingFile + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/druid/druid.log4j2.xml + appender.FILE.filePattern = /stackable/log/druid/druid.log4j2.xml.%i + appender.FILE.layout.type = XMLLayout + appender.FILE.policies.type = Policies + appender.FILE.policies.size.type = SizeBasedTriggeringPolicy + appender.FILE.policies.size.size = 5MB + appender.FILE.strategy.type = DefaultRolloverStrategy + appender.FILE.strategy.max = 1 + appender.FILE.filter.threshold.type = ThresholdFilter + appender.FILE.filter.threshold.level = INFO + + + rootLogger.level=INFO + rootLogger.appenderRefs = CONSOLE, FILE + rootLogger.appenderRef.CONSOLE.ref = CONSOLE + rootLogger.appenderRef.FILE.ref = FILE + runtime.properties: | + druid.auth.basic.ssl.trustStorePassword=changeit + druid.auth.basic.ssl.trustStorePath=/stackable/tls/truststore.p12 + druid.auth.basic.ssl.trustStoreType=pkcs12 + druid.client.https.trustStorePassword=changeit + druid.client.https.trustStorePath=/stackable/tls/truststore.p12 + druid.client.https.trustStoreType=pkcs12 + druid.emitter=prometheus + druid.emitter.prometheus.namespace=druid + druid.emitter.prometheus.port=9090 + druid.emitter.prometheus.strategy=exporter + druid.enablePlaintextPort=false + druid.enableTlsPort=true + druid.extensions.loadList=["druid-basic-security",\ "druid-datasketches",\ "druid-hdfs-storage",\ "druid-kafka-indexing-service",\ "druid-opa-authorizer",\ "postgresql-metadata-storage",\ "prometheus-emitter",\ "simple-client-sslcontext"] + druid.indexer.logs.directory=/stackable/var/druid/indexing-logs + druid.indexer.runner.javaOpts=-server\ -Xms256m\ -Xmx256m\ -XX\:MaxDirectMemorySize\=300m\ -Duser.timezone\=UTC\ -Dfile.encoding\=UTF-8\ -XX\:+ExitOnOutOfMemoryError\ -Djava.util.logging.manager\=org.apache.logging.log4j.jul.LogManager + druid.indexer.runner.javaOptsArray=["-Djavax.net.ssl.trustStore\=/stackable/truststore.p12",\ "-Djavax.net.ssl.trustStorePassword\=changeit",\ "-Djavax.net.ssl.trustStoreType\=pkcs12"] + druid.indexer.task.baseTaskDir=/stackable/var/druid/task + druid.indexer.task.hadoopWorkingPath=/stackable/var/druid/hadoop-tmp + druid.metadata.storage.connector.connectURI=jdbc\:postgresql\://druid-postgresql\:5432/druid + druid.metadata.storage.connector.password=${env\:METADATA_DATABASE_PASSWORD} + druid.metadata.storage.connector.user=${env\:METADATA_DATABASE_USERNAME} + druid.metadata.storage.type=postgresql + druid.monitoring.monitors=["org.apache.druid.java.util.metrics.JvmMonitor"] + druid.server.https.certAlias=1 + druid.server.https.keyStorePassword=changeit + druid.server.https.keyStorePath=/stackable/tls/keystore.p12 + druid.server.https.keyStoreType=pkcs12 + druid.startup.logging.logProperties=true + druid.storage.bucket= + druid.storage.storageDirectory=/druid + druid.storage.type=hdfs + druid.tlsPort=8291 + druid.zk.service.host=druid-zk-server.__NAMESPACE__.svc.cluster.local\:2282/znode-__ZNODE__ + security.properties: | + druid.indexer.runner.javaOptsArray=["-Djavax.net.ssl.trustStore\=/stackable/truststore.p12",\ "-Djavax.net.ssl.trustStorePassword\=changeit",\ "-Djavax.net.ssl.trustStoreType\=pkcs12"] + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm druid-middlemanager-default -o yaml \ + | yq -o=json '.data' \ + | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap druid-middlemanager-default data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi + + # Role-group ConfigMap: druid-router-default + - script: | + expected=$(cat <<'YAMLEOF' | sed "s|__NAMESPACE__|$NAMESPACE|g" | yq -o=json + jvm.config: |- + -server + -Xmx84m + -Xms84m + -XX:MaxDirectMemorySize=128m + -XX:+ExitOnOutOfMemoryError + -XX:+UseG1GC + -Djava.security.properties=/stackable/rwconfig/security.properties + -Duser.timezone=UTC + -Dfile.encoding=UTF-8 + -Djava.io.tmpdir=/tmp + -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager + -Dlog4j.configurationFile=/stackable/rwconfig/log4j2.properties + -Djavax.net.ssl.trustStore=/stackable/truststore.p12 + -Djavax.net.ssl.trustStorePassword=changeit + -Djavax.net.ssl.trustStoreType=pkcs12 + log4j2.properties: |- + appenders = FILE, CONSOLE + + appender.CONSOLE.type = Console + appender.CONSOLE.name = CONSOLE + appender.CONSOLE.target = SYSTEM_ERR + appender.CONSOLE.layout.type = PatternLayout + appender.CONSOLE.layout.pattern = %d{ISO8601} %p [%t] %c - %m%n + appender.CONSOLE.filter.threshold.type = ThresholdFilter + appender.CONSOLE.filter.threshold.level = INFO + + appender.FILE.type = RollingFile + appender.FILE.name = FILE + appender.FILE.fileName = /stackable/log/druid/druid.log4j2.xml + appender.FILE.filePattern = /stackable/log/druid/druid.log4j2.xml.%i + appender.FILE.layout.type = XMLLayout + appender.FILE.policies.type = Policies + appender.FILE.policies.size.type = SizeBasedTriggeringPolicy + appender.FILE.policies.size.size = 5MB + appender.FILE.strategy.type = DefaultRolloverStrategy + appender.FILE.strategy.max = 1 + appender.FILE.filter.threshold.type = ThresholdFilter + appender.FILE.filter.threshold.level = INFO + + + rootLogger.level=INFO + rootLogger.appenderRefs = CONSOLE, FILE + rootLogger.appenderRef.CONSOLE.ref = CONSOLE + rootLogger.appenderRef.FILE.ref = FILE + runtime.properties: | + druid.auth.basic.ssl.trustStorePassword=changeit + druid.auth.basic.ssl.trustStorePath=/stackable/tls/truststore.p12 + druid.auth.basic.ssl.trustStoreType=pkcs12 + druid.client.https.trustStorePassword=changeit + druid.client.https.trustStorePath=/stackable/tls/truststore.p12 + druid.client.https.trustStoreType=pkcs12 + druid.emitter=prometheus + druid.emitter.prometheus.namespace=druid + druid.emitter.prometheus.port=9090 + druid.emitter.prometheus.strategy=exporter + druid.enablePlaintextPort=false + druid.enableTlsPort=true + druid.extensions.loadList=["druid-basic-security",\ "druid-datasketches",\ "druid-hdfs-storage",\ "druid-kafka-indexing-service",\ "druid-opa-authorizer",\ "postgresql-metadata-storage",\ "prometheus-emitter",\ "simple-client-sslcontext"] + druid.indexer.logs.directory=/stackable/var/druid/indexing-logs + druid.metadata.storage.connector.connectURI=jdbc\:postgresql\://druid-postgresql\:5432/druid + druid.metadata.storage.connector.password=${env\:METADATA_DATABASE_PASSWORD} + druid.metadata.storage.connector.user=${env\:METADATA_DATABASE_USERNAME} + druid.metadata.storage.type=postgresql + druid.monitoring.monitors=["org.apache.druid.java.util.metrics.JvmMonitor"] + druid.router.http.numConnections=25 + druid.router.managementProxy.enabled=true + druid.server.https.certAlias=1 + druid.server.https.keyStorePassword=changeit + druid.server.https.keyStorePath=/stackable/tls/keystore.p12 + druid.server.https.keyStoreType=pkcs12 + druid.startup.logging.logProperties=true + druid.storage.bucket= + druid.storage.storageDirectory=/druid + druid.storage.type=hdfs + druid.tlsPort=9088 + druid.zk.service.host=druid-zk-server.__NAMESPACE__.svc.cluster.local\:2282/znode-__ZNODE__ + security.properties: | + networkaddress.cache.negative.ttl=0 + networkaddress.cache.ttl=30 + YAMLEOF + ) + actual=$(kubectl -n $NAMESPACE get cm druid-router-default -o yaml \ + | yq -o=json '.data' \ + | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') + if [ "$expected" != "$actual" ]; then + echo "ERROR: ConfigMap druid-router-default data drifted from snapshot." + echo "=== expected ===" + printf '%s\n' "$expected" + echo "=== actual ===" + printf '%s\n' "$actual" + exit 1 + fi From d2e075ca0fcefc06b73f60447736b5ca464941b8 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 20:03:20 +0200 Subject: [PATCH 11/16] test(smoke): drop unused vector_enabled scaffolding from 52-assert vector_enabled was declared in 52-assert.yaml.j2 as forward scaffolding but has no consumer. Drop it; it can be re-introduced in the commit that actually uses it. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/52-assert.yaml.j2 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/templates/kuttl/smoke/52-assert.yaml.j2 b/tests/templates/kuttl/smoke/52-assert.yaml.j2 index 0148c335..52e24ac3 100644 --- a/tests/templates/kuttl/smoke/52-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/52-assert.yaml.j2 @@ -1,7 +1,3 @@ -{# vector_enabled is consumed by 53-assert.yaml.j2 conditionals; declared here - too so future container-level (vector sidecar) assertions can use it without - adding the lookup again. #} -{% set vector_enabled = lookup('env', 'VECTOR_AGGREGATOR') | length > 0 %} --- # Declarative shape assertions for every druid-managed resource in the smoke # test except ConfigMap *.data* (covered in 53-assert.yaml.j2) and the From e66ee6e06f93542b0e28b9db1f77d4a6a42024c6 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 20:04:02 +0200 Subject: [PATCH 12/16] test(smoke): cover all five roles in 51-assert containerdebug check The previous check listed druid-router-default-0 twice and never checked druid-broker-default-0. Replace the duplicate with the broker entry and sort the list by role name for readability. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/51-assert.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/templates/kuttl/smoke/51-assert.yaml b/tests/templates/kuttl/smoke/51-assert.yaml index 9d9898af..9fce26f5 100644 --- a/tests/templates/kuttl/smoke/51-assert.yaml +++ b/tests/templates/kuttl/smoke/51-assert.yaml @@ -4,8 +4,8 @@ apiVersion: kuttl.dev/v1beta1 kind: TestAssert timeout: 600 commands: + - script: kubectl exec -n $NAMESPACE --container druid druid-broker-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' - script: kubectl exec -n $NAMESPACE --container druid druid-coordinator-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' - - script: kubectl exec -n $NAMESPACE --container druid druid-router-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' + - script: kubectl exec -n $NAMESPACE --container druid druid-historical-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' - script: kubectl exec -n $NAMESPACE --container druid druid-middlemanager-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' - script: kubectl exec -n $NAMESPACE --container druid druid-router-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' - - script: kubectl exec -n $NAMESPACE --container druid druid-historical-default-0 -- cat /stackable/log/containerdebug-state.json | jq --exit-status '"valid JSON"' From 282b0e5c3c43f9209e195c3727e7f638495afd2d Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 20:18:17 +0200 Subject: [PATCH 13/16] test(smoke): add vector_enabled branch to role-group cm snapshot When VECTOR_AGGREGATOR is set, the operator adds a vector.yaml key to each role-group ConfigMap; the other four keys are unchanged. The vector.yaml payload is the same across all five roles except for a single `.role = ""` line, so the content is declared once via a jinja2 `{% set %}` block and substituted per role inside the heredoc. Verified against a live VECTOR_AGGREGATOR=vector-aggregator-discovery cluster in namespace kuttl-test-distinct-camel. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/53-assert.yaml.j2 | 620 ++++++++++++++++++ 1 file changed, 620 insertions(+) diff --git a/tests/templates/kuttl/smoke/53-assert.yaml.j2 b/tests/templates/kuttl/smoke/53-assert.yaml.j2 index 044e81b7..5a2885ac 100644 --- a/tests/templates/kuttl/smoke/53-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/53-assert.yaml.j2 @@ -1,3 +1,603 @@ +{% set vector_enabled = lookup('env', 'VECTOR_AGGREGATOR') | length > 0 %} +{% set vector_yaml -%} +data_dir: /stackable/log/_vector-state + +log_schema: + host_key: pod + +sources: + vector: + type: internal_logs + + files_stdout: + type: file + include: + - /stackable/log/*/*.stdout.log + + files_stderr: + type: file + include: + - /stackable/log/*/*.stderr.log + + files_log4j: + type: file + include: + - /stackable/log/*/*.log4j.xml + line_delimiter: "\r\n" + multiline: + mode: halt_before + start_pattern: ^" + raw_message + "" + parsed_event, err = parse_xml(wrapped_xml_event) + if err != null { + error = "XML not parsable: " + err + .errors = push(.errors, error) + log(error, level: "warn") + .message = raw_message + } else { + root = object!(parsed_event.root) + if !is_object(root.event) { + error = "Parsed event contains no \"event\" tag." + .errors = push(.errors, error) + log(error, level: "warn") + .message = raw_message + } else { + if keys(root) != ["event"] { + .errors = push(.errors, "Parsed event contains multiple tags: " + join!(keys(root), ", ")) + } + event = object!(root.event) + + epoch_milliseconds, err = to_int(event.@timestamp) + if err == null && epoch_milliseconds != 0 { + converted_timestamp, err = from_unix_timestamp(epoch_milliseconds, "milliseconds") + if err == null { + .timestamp = converted_timestamp + } else { + .errors = push(.errors, "Time not parsable, using current time instead: " + err) + } + } else { + .errors = push(.errors, "Timestamp not found, using current time instead.") + } + + .logger, err = string(event.@logger) + if err != null || is_empty(.logger) { + .errors = push(.errors, "Logger not found.") + } + + level, err = string(event.@level) + if err != null { + .errors = push(.errors, "Level not found, using \"" + .level + "\" instead.") + } else if !includes(["TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"], level) { + .errors = push(.errors, "Level \"" + level + "\" unknown, using \"" + .level + "\" instead.") + } else { + .level = level + } + + message, err = string(event.message) + if err != null || is_empty(message) { + .errors = push(.errors, "Message not found.") + } + throwable = string(event.throwable) ?? "" + .message = join!(compact([message, throwable]), "\n") + } + } + + processed_files_log4j2: + inputs: + - files_log4j2 + type: remap + source: | + raw_message = string!(.message) + + .timestamp = now() + .logger = "" + .level = "INFO" + .message = "" + .errors = [] + + event = {} + parsed_event, err = parse_xml(raw_message) + if err != null { + error = "XML not parsable: " + err + .errors = push(.errors, error) + log(error, level: "warn") + .message = raw_message + } else { + if !is_object(parsed_event.Event) { + error = "Parsed event contains no \"Event\" tag." + .errors = push(.errors, error) + log(error, level: "warn") + .message = raw_message + } else { + event = object!(parsed_event.Event) + + tag_instant_valid = false + instant, err = object(event.Instant) + if err == null { + epoch_nanoseconds, err = to_int(instant.@epochSecond) * 1_000_000_000 + to_int(instant.@nanoOfSecond) + if err == null && epoch_nanoseconds != 0 { + converted_timestamp, err = from_unix_timestamp(epoch_nanoseconds, "nanoseconds") + if err == null { + .timestamp = converted_timestamp + tag_instant_valid = true + } else { + .errors = push(.errors, "Instant invalid, trying property timeMillis instead: " + err) + } + } else { + .errors = push(.errors, "Instant invalid, trying property timeMillis instead: " + err) + } + } + if !tag_instant_valid { + epoch_milliseconds, err = to_int(event.@timeMillis) + if err == null && epoch_milliseconds != 0 { + converted_timestamp, err = from_unix_timestamp(epoch_milliseconds, "milliseconds") + if err == null { + .timestamp = converted_timestamp + } else { + .errors = push(.errors, "timeMillis not parsable, using current time instead: " + err) + } + } else { + .errors = push(.errors, "timeMillis not parsable, using current time instead: " + err) + } + } + + .logger, err = string(event.@loggerName) + if err != null || is_empty(.logger) { + .errors = push(.errors, "Logger not found.") + } + + level, err = string(event.@level) + if err != null { + .errors = push(.errors, "Level not found, using \"" + .level + "\" instead.") + } else if !includes(["TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"], level) { + .errors = push(.errors, "Level \"" + level + "\" unknown, using \"" + .level + "\" instead.") + } else { + .level = level + } + + exception = null + thrown = event.Thrown + if is_object(thrown) { + exception = "Exception" + thread, err = string(event.@thread) + if err == null && !is_empty(thread) { + exception = exception + " in thread \"" + thread + "\"" + } + thrown_name, err = string(thrown.@name) + if err == null && !is_empty(exception) { + exception = exception + " " + thrown_name + } + message = string(thrown.@localizedMessage) ?? + string(thrown.@message) ?? + "" + if !is_empty(message) { + exception = exception + ": " + message + } + stacktrace_items = array(thrown.ExtendedStackTrace.ExtendedStackTraceItem) ?? [] + stacktrace = "" + for_each(stacktrace_items) -> |_index, value| { + stacktrace = stacktrace + " " + class = string(value.@class) ?? "" + method = string(value.@method) ?? "" + if !is_empty(class) && !is_empty(method) { + stacktrace = stacktrace + "at " + class + "." + method + } + file = string(value.@file) ?? "" + line = string(value.@line) ?? "" + if !is_empty(file) && !is_empty(line) { + stacktrace = stacktrace + "(" + file + ":" + line + ")" + } + exact = to_bool(value.@exact) ?? false + location = string(value.@location) ?? "" + version = string(value.@version) ?? "" + if !is_empty(location) && !is_empty(version) { + stacktrace = stacktrace + " " + if !exact { + stacktrace = stacktrace + "~" + } + stacktrace = stacktrace + "[" + location + ":" + version + "]" + } + stacktrace = stacktrace + "\n" + } + if stacktrace != "" { + exception = exception + "\n" + stacktrace + } + } + + message, err = string(event.Message) + if err != null || is_empty(message) { + message = null + .errors = push(.errors, "Message not found.") + } + .message = join!(compact([message, exception]), "\n") + } + } + + processed_files_py: + inputs: + - files_py + type: remap + source: | + raw_message = string!(.message) + + .timestamp = now() + .logger = "" + .level = "INFO" + .message = "" + .errors = [] + + parsed_event, err = parse_json(raw_message) + if err != null { + error = "JSON not parsable: " + err + .errors = push(.errors, error) + log(error, level: "warn") + .message = raw_message + } else if !is_object(parsed_event) { + error = "Parsed event is not a JSON object." + .errors = push(.errors, error) + log(error, level: "warn") + .message = raw_message + } else { + event = object!(parsed_event) + + asctime, err = string(event.asctime) + if err == null { + parsed_timestamp, err = parse_timestamp(asctime, "%F %T,%3f") + if err == null { + .timestamp = parsed_timestamp + } else { + .errors = push(.errors, "Timestamp not parsable, using current time instead: "+ err) + } + } else { + .errors = push(.errors, "Timestamp not found, using current time instead.") + } + + .logger, err = string(event.name) + if err != null || is_empty(.logger) { + .errors = push(.errors, "Logger not found.") + } + + level, err = string(event.levelname) + if err != null { + .errors = push(.errors, "Level not found, using \"" + .level + "\" instead.") + } else if level == "DEBUG" { + .level = "DEBUG" + } else if level == "INFO" { + .level = "INFO" + } else if level == "WARNING" { + .level = "WARN" + } else if level == "ERROR" { + .level = "ERROR" + } else if level == "CRITICAL" { + .level = "FATAL" + } else { + .errors = push(.errors, "Level \"" + level + "\" unknown, using \"" + .level + "\" instead.") + } + + .message, err = string(event.message) + if err != null || is_empty(.message) { + .errors = push(.errors, "Message not found.") + } + } + + processed_files_airlift: + inputs: + - files_airlift + type: remap + source: | + raw_message = string!(.message) + + .timestamp = now() + .logger = "" + .level = "INFO" + .message = "" + .errors = [] + + parsed_event, err = parse_json(raw_message) + if err != null { + error = "JSON not parsable: " + err + .errors = push(.errors, error) + log(error, level: "warn") + .message = raw_message + } else if !is_object(parsed_event) { + error = "Parsed event is not a JSON object." + .errors = push(.errors, error) + log(error, level: "warn") + .message = raw_message + } else { + event = object!(parsed_event) + + timestamp_string, err = string(event.timestamp) + if err == null { + parsed_timestamp, err = parse_timestamp(timestamp_string, "%Y-%m-%dT%H:%M:%S.%fZ") + if err == null { + .timestamp = parsed_timestamp + } else { + .errors = push(.errors, "Timestamp not parsable, using current time instead: " + err) + } + } else { + .errors = push(.errors, "Timestamp not found, using current time instead.") + } + + .logger, err = string(event.logger) + if err != null || is_empty(.logger) { + .errors = push(.errors, "Logger not found.") + } + + level, err = string(event.level) + if err != null { + .errors = push(.errors, "Level not found, using \"" + .level + "\" instead.") + } else if !includes(["TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"], level) { + .errors = push(.errors, "Level \"" + level + "\" unknown, using \"" + .level + "\" instead.") + } else { + .level = level + } + + .thread = string(parsed_event.thread) ?? null + + .message, err = string(event.message) + if err != null || is_empty(.message) { + .errors = push(.errors, "Message not found.") + } + stacktrace = string(event.stackTrace) ?? "" + .message = join!(compact([.message, stacktrace]), "\n\n") + } + + extended_logs_files: + inputs: + - processed_files_* + type: remap + source: | + del(.source_type) + if .errors == [] { + del(.errors) + } + . |= parse_regex!(.file, r'^/stackable/log/(?P.*?)/(?P.*?)$') + + filtered_logs_vector: + inputs: + - vector + type: filter + condition: '!includes(["TRACE", "DEBUG"], .metadata.level)' + + extended_logs_vector: + inputs: + - filtered_logs_vector + type: remap + source: | + .container = "vector" + .level = .metadata.level + .logger = .metadata.module_path + if exists(.file) { .processed_file = del(.file) } + del(.metadata) + del(.pid) + del(.source_type) + + extended_logs: + inputs: + - extended_logs_* + type: remap + source: | + .namespace = "__NAMESPACE__" + .cluster = "druid" + .role = "__ROLE__" + .roleGroup = "default" + +sinks: + aggregator: + inputs: + - extended_logs + type: vector + address: $VECTOR_AGGREGATOR_ADDRESS +{% endset %} --- # Snapshot the full `.data` of each operator-managed ConfigMap. # Any code change that alters rendered config values will fail these diffs. @@ -124,6 +724,10 @@ commands: security.properties: | networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 + {% if vector_enabled -%} + vector.yaml: | +{{ vector_yaml | replace("__ROLE__", "broker") | indent(8, true) }} + {%- endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-broker-default -o yaml \ @@ -226,6 +830,10 @@ commands: security.properties: | networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 + {% if vector_enabled -%} + vector.yaml: | +{{ vector_yaml | replace("__ROLE__", "coordinator") | indent(8, true) }} + {%- endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-coordinator-default -o yaml \ @@ -328,6 +936,10 @@ commands: security.properties: | networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 + {% if vector_enabled -%} + vector.yaml: | +{{ vector_yaml | replace("__ROLE__", "historical") | indent(8, true) }} + {%- endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-historical-default -o yaml \ @@ -427,6 +1039,10 @@ commands: druid.indexer.runner.javaOptsArray=["-Djavax.net.ssl.trustStore\=/stackable/truststore.p12",\ "-Djavax.net.ssl.trustStorePassword\=changeit",\ "-Djavax.net.ssl.trustStoreType\=pkcs12"] networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 + {% if vector_enabled -%} + vector.yaml: | +{{ vector_yaml | replace("__ROLE__", "middlemanager") | indent(8, true) }} + {%- endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-middlemanager-default -o yaml \ @@ -524,6 +1140,10 @@ commands: security.properties: | networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 + {% if vector_enabled -%} + vector.yaml: | +{{ vector_yaml | replace("__ROLE__", "router") | indent(8, true) }} + {%- endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-router-default -o yaml \ From e22233d66d11e77ac35a93ac0d25892b019dbfa6 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 21 May 2026 20:36:47 +0200 Subject: [PATCH 14/16] fix(tests): place vector_enabled conditional at column 0 The `{% if vector_enabled -%}` and `{%- endif %}` blocks had 6 leading spaces. When vector_enabled was false, jinja2's strip markers joined the leading whitespace of both blocks, putting YAMLEOF at 12 spaces in the rendered output. After the kuttl YAML block scalar strips 6 common-leading spaces, YAMLEOF ended up at 6 spaces inside the script body, but the heredoc opener is `<<'YAMLEOF'` (not `<<-`), which requires the terminator at column 0. The heredoc therefore never closed and `\$(...)` never terminated, producing `Syntax error: end of file unexpected (expecting ")")` from dash. Moving `{% if %}` and `{% endif %}` to column 0 (no strip markers) mirrors kafka-operator's 34-assert.yaml.j2 pattern and renders both branches with YAMLEOF at the correct indent. Verified: - non-vector render: 8 scripts pass against kuttl-test-fluent-mastodon - vector render heredoc parses to the 5 expected keys (jvm.config, log4j2.properties, runtime.properties, security.properties, vector.yaml) Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/templates/kuttl/smoke/53-assert.yaml.j2 | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/templates/kuttl/smoke/53-assert.yaml.j2 b/tests/templates/kuttl/smoke/53-assert.yaml.j2 index 5a2885ac..30319f6d 100644 --- a/tests/templates/kuttl/smoke/53-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/53-assert.yaml.j2 @@ -724,10 +724,10 @@ commands: security.properties: | networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 - {% if vector_enabled -%} +{% if vector_enabled %} vector.yaml: | {{ vector_yaml | replace("__ROLE__", "broker") | indent(8, true) }} - {%- endif %} +{% endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-broker-default -o yaml \ @@ -830,10 +830,10 @@ commands: security.properties: | networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 - {% if vector_enabled -%} +{% if vector_enabled %} vector.yaml: | {{ vector_yaml | replace("__ROLE__", "coordinator") | indent(8, true) }} - {%- endif %} +{% endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-coordinator-default -o yaml \ @@ -936,10 +936,10 @@ commands: security.properties: | networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 - {% if vector_enabled -%} +{% if vector_enabled %} vector.yaml: | {{ vector_yaml | replace("__ROLE__", "historical") | indent(8, true) }} - {%- endif %} +{% endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-historical-default -o yaml \ @@ -1039,10 +1039,10 @@ commands: druid.indexer.runner.javaOptsArray=["-Djavax.net.ssl.trustStore\=/stackable/truststore.p12",\ "-Djavax.net.ssl.trustStorePassword\=changeit",\ "-Djavax.net.ssl.trustStoreType\=pkcs12"] networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 - {% if vector_enabled -%} +{% if vector_enabled %} vector.yaml: | {{ vector_yaml | replace("__ROLE__", "middlemanager") | indent(8, true) }} - {%- endif %} +{% endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-middlemanager-default -o yaml \ @@ -1140,10 +1140,10 @@ commands: security.properties: | networkaddress.cache.negative.ttl=0 networkaddress.cache.ttl=30 - {% if vector_enabled -%} +{% if vector_enabled %} vector.yaml: | {{ vector_yaml | replace("__ROLE__", "router") | indent(8, true) }} - {%- endif %} +{% endif %} YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid-router-default -o yaml \ From 7719cb948ecadc27394f765e54dc1a8106346376 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 22 May 2026 12:13:04 +0200 Subject: [PATCH 15/16] docs: adapt changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28cde348..e24cedb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ All notable changes to this project will be documented in this file. The `.clusterConfig.metadataStorageDatabase` has subfields according to the supported db types: `postgresql`, `mysql` and `derby`. - BREAKING: The `.clusterConfig.metadataStorageDatabase` field has been renamed to `.clusterConfig.metadataDatabase` for consistency ([#814]). - Document Helm deployed RBAC permissions and remove unnecessary permissions ([#810]). +- Internal operator refactoring: introduce dereference() and validate() steps in the reconciler ([#824]). ### Deleted @@ -25,6 +26,7 @@ All notable changes to this project will be documented in this file. [#813]: https://github.com/stackabletech/druid-operator/pull/813 [#814]: https://github.com/stackabletech/druid-operator/pull/814 [#818]: https://github.com/stackabletech/druid-operator/pull/818 +[#824]: https://github.com/stackabletech/druid-operator/pull/824 ## [26.3.0] - 2026-03-16 From 3f5b828b0812018fa4e1c8845da3d92bc6596e88 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Fri, 22 May 2026 13:20:57 +0200 Subject: [PATCH 16/16] fix(test): make configmap diff better readable --- tests/templates/kuttl/smoke/53-assert.yaml.j2 | 72 +++++++++++-------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/tests/templates/kuttl/smoke/53-assert.yaml.j2 b/tests/templates/kuttl/smoke/53-assert.yaml.j2 index 30319f6d..53c7b9f8 100644 --- a/tests/templates/kuttl/smoke/53-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/53-assert.yaml.j2 @@ -633,14 +633,16 @@ commands: YAMLEOF ) actual=$(kubectl -n $NAMESPACE get cm druid -o yaml | yq -o=json '.data') - if [ "$expected" != "$actual" ]; then + expected_file=$(mktemp) && actual_file=$(mktemp) + printf '%s\n' "$expected" > "$expected_file" + printf '%s\n' "$actual" > "$actual_file" + if ! diff_out=$(diff -u "$expected_file" "$actual_file"); then echo "ERROR: ConfigMap druid data drifted from snapshot." - echo "=== expected ===" - printf '%s\n' "$expected" - echo "=== actual ===" - printf '%s\n' "$actual" + printf '%s\n' "$diff_out" + rm -f "$expected_file" "$actual_file" exit 1 fi + rm -f "$expected_file" "$actual_file" # Role-group ConfigMap: druid-broker-default - script: | @@ -733,14 +735,16 @@ commands: actual=$(kubectl -n $NAMESPACE get cm druid-broker-default -o yaml \ | yq -o=json '.data' \ | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') - if [ "$expected" != "$actual" ]; then + expected_file=$(mktemp) && actual_file=$(mktemp) + printf '%s\n' "$expected" > "$expected_file" + printf '%s\n' "$actual" > "$actual_file" + if ! diff_out=$(diff -u "$expected_file" "$actual_file"); then echo "ERROR: ConfigMap druid-broker-default data drifted from snapshot." - echo "=== expected ===" - printf '%s\n' "$expected" - echo "=== actual ===" - printf '%s\n' "$actual" + printf '%s\n' "$diff_out" + rm -f "$expected_file" "$actual_file" exit 1 fi + rm -f "$expected_file" "$actual_file" # Role-group ConfigMap: druid-coordinator-default - script: | @@ -839,14 +843,16 @@ commands: actual=$(kubectl -n $NAMESPACE get cm druid-coordinator-default -o yaml \ | yq -o=json '.data' \ | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') - if [ "$expected" != "$actual" ]; then + expected_file=$(mktemp) && actual_file=$(mktemp) + printf '%s\n' "$expected" > "$expected_file" + printf '%s\n' "$actual" > "$actual_file" + if ! diff_out=$(diff -u "$expected_file" "$actual_file"); then echo "ERROR: ConfigMap druid-coordinator-default data drifted from snapshot." - echo "=== expected ===" - printf '%s\n' "$expected" - echo "=== actual ===" - printf '%s\n' "$actual" + printf '%s\n' "$diff_out" + rm -f "$expected_file" "$actual_file" exit 1 fi + rm -f "$expected_file" "$actual_file" # Role-group ConfigMap: druid-historical-default - script: | @@ -945,14 +951,16 @@ commands: actual=$(kubectl -n $NAMESPACE get cm druid-historical-default -o yaml \ | yq -o=json '.data' \ | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') - if [ "$expected" != "$actual" ]; then + expected_file=$(mktemp) && actual_file=$(mktemp) + printf '%s\n' "$expected" > "$expected_file" + printf '%s\n' "$actual" > "$actual_file" + if ! diff_out=$(diff -u "$expected_file" "$actual_file"); then echo "ERROR: ConfigMap druid-historical-default data drifted from snapshot." - echo "=== expected ===" - printf '%s\n' "$expected" - echo "=== actual ===" - printf '%s\n' "$actual" + printf '%s\n' "$diff_out" + rm -f "$expected_file" "$actual_file" exit 1 fi + rm -f "$expected_file" "$actual_file" # Role-group ConfigMap: druid-middlemanager-default - script: | @@ -1048,14 +1056,16 @@ commands: actual=$(kubectl -n $NAMESPACE get cm druid-middlemanager-default -o yaml \ | yq -o=json '.data' \ | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') - if [ "$expected" != "$actual" ]; then + expected_file=$(mktemp) && actual_file=$(mktemp) + printf '%s\n' "$expected" > "$expected_file" + printf '%s\n' "$actual" > "$actual_file" + if ! diff_out=$(diff -u "$expected_file" "$actual_file"); then echo "ERROR: ConfigMap druid-middlemanager-default data drifted from snapshot." - echo "=== expected ===" - printf '%s\n' "$expected" - echo "=== actual ===" - printf '%s\n' "$actual" + printf '%s\n' "$diff_out" + rm -f "$expected_file" "$actual_file" exit 1 fi + rm -f "$expected_file" "$actual_file" # Role-group ConfigMap: druid-router-default - script: | @@ -1149,11 +1159,13 @@ commands: actual=$(kubectl -n $NAMESPACE get cm druid-router-default -o yaml \ | yq -o=json '.data' \ | sed -E 's|/znode-[a-f0-9-]+|/znode-__ZNODE__|') - if [ "$expected" != "$actual" ]; then + expected_file=$(mktemp) && actual_file=$(mktemp) + printf '%s\n' "$expected" > "$expected_file" + printf '%s\n' "$actual" > "$actual_file" + if ! diff_out=$(diff -u "$expected_file" "$actual_file"); then echo "ERROR: ConfigMap druid-router-default data drifted from snapshot." - echo "=== expected ===" - printf '%s\n' "$expected" - echo "=== actual ===" - printf '%s\n' "$actual" + printf '%s\n' "$diff_out" + rm -f "$expected_file" "$actual_file" exit 1 fi + rm -f "$expected_file" "$actual_file"