Skip to content

Commit 0eca54d

Browse files
authored
normalize column references (#374)
1 parent 6f7a409 commit 0eca54d

2 files changed

Lines changed: 279 additions & 0 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ and writer errors now report user-facing aesthetic names (`x`, `y`, `panel`,
2323
`row`, …) instead of internal forms (`pos1`, `pos2`, `facet1`, …), translated
2424
based on the active coordinate system and facet layout (#388).
2525
- Fixed opacity calculation in point layers with Vega-Lite (#393)
26+
- Fixed an issue with case-sensitive column references in mappings (#374)
2627

2728
### Changed
2829

src/execute/mod.rs

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,93 @@ fn validate(
150150
Ok(())
151151
}
152152

153+
// =============================================================================
154+
// Column Name Normalization
155+
// =============================================================================
156+
157+
/// Rewrite `name` to match the schema's casing (case-insensitive resolution).
158+
///
159+
/// SQL treats unquoted identifiers as case-insensitive, so users may write
160+
/// `VISUALISE CATEGORY AS x` even when DuckDB returns the column as `category`.
161+
/// ggsql's own validator and generated SQL treat column names case-sensitively,
162+
/// so we reconcile by rewriting the user-written name to the schema's casing
163+
/// before either runs.
164+
///
165+
/// Exact match wins. Otherwise, if exactly one case-insensitive match exists,
166+
/// `name` is rewritten to that match. Ambiguous matches (e.g. schema has both
167+
/// `"Foo"` and `"foo"` and user wrote `FOO`) and missing references are left
168+
/// untouched so the existing validator can report them with its normal error.
169+
fn normalize_column_ref(name: &mut String, schema_names: &[&str]) {
170+
if schema_names.contains(&name.as_str()) {
171+
return;
172+
}
173+
let name_lower = name.to_lowercase();
174+
let mut match_iter = schema_names
175+
.iter()
176+
.filter(|s| s.to_lowercase() == name_lower);
177+
if let Some(first) = match_iter.next() {
178+
if match_iter.next().is_none() {
179+
*name = (*first).to_string();
180+
}
181+
}
182+
}
183+
184+
/// Normalize all user-written column references in `specs` against their layer
185+
/// schemas.
186+
///
187+
/// Runs after `merge_global_mappings_into_layers` so every aesthetic that a
188+
/// layer will consult is already attached to that layer's `mappings`; each
189+
/// layer can then be normalized against its own schema. This matters for
190+
/// multi-source layers (e.g. `MAPPING ... FROM temps` vs `... FROM ozone`),
191+
/// where the schemas — and the column casings — can legitimately differ.
192+
///
193+
/// Covers aesthetic `Column` values and `partition_by` per layer, plus
194+
/// user-written facet variables on the plot-level `FACET` clause.
195+
fn normalize_column_references(specs: &mut [Plot], layer_schemas: &[Schema]) {
196+
for spec in specs {
197+
for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) {
198+
if matches!(layer.source, Some(DataSource::Annotation)) {
199+
continue;
200+
}
201+
let names: Vec<&str> = schema.iter().map(|c| c.name.as_str()).collect();
202+
for value in layer.mappings.aesthetics.values_mut() {
203+
if let AestheticValue::Column { name, .. } = value {
204+
normalize_column_ref(name, &names);
205+
}
206+
}
207+
for col in &mut layer.partition_by {
208+
normalize_column_ref(col, &names);
209+
}
210+
}
211+
212+
// Facet variables are plot-level. Normalize against the first layer
213+
// whose schema contains the variable (case-insensitively). If no
214+
// layer matches, leave it — `add_facet_mappings_to_layers` simply
215+
// won't inject a mapping for layers that don't have the column.
216+
if let Some(facet) = spec.facet.as_mut() {
217+
let normalize_var = |var: &mut String| {
218+
for schema in layer_schemas {
219+
let names: Vec<&str> = schema.iter().map(|c| c.name.as_str()).collect();
220+
let before = var.clone();
221+
normalize_column_ref(var, &names);
222+
if *var != before || names.contains(&var.as_str()) {
223+
break;
224+
}
225+
}
226+
};
227+
match &mut facet.layout {
228+
crate::plot::FacetLayout::Wrap { variables } => {
229+
variables.iter_mut().for_each(normalize_var);
230+
}
231+
crate::plot::FacetLayout::Grid { row, column } => {
232+
row.iter_mut().for_each(normalize_var);
233+
column.iter_mut().for_each(normalize_var);
234+
}
235+
}
236+
}
237+
}
238+
}
239+
153240
// =============================================================================
154241
// Global Mapping & Color Splitting
155242
// =============================================================================
@@ -1035,6 +1122,11 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result<Prep
10351122
// because transformation happens in builder.rs right after parsing
10361123
merge_global_mappings_into_layers(&mut specs, &layer_schemas);
10371124

1125+
// Reconcile user-written column casing with schema casing (DuckDB lowercases
1126+
// unquoted identifiers). Must run after the global→layer merge so each layer
1127+
// is normalized against its own schema, which matters for multi-source layers.
1128+
normalize_column_references(&mut specs, &layer_schemas);
1129+
10381130
// Resolve aesthetic aliases (e.g., 'color' → 'fill'/'stroke') early in the pipeline
10391131
// This must happen before validation so concrete aesthetics are validated
10401132
for spec in &mut specs {
@@ -2859,6 +2951,192 @@ mod tests {
28592951
);
28602952
}
28612953

2954+
// =========================================================================
2955+
// Case-insensitive column reference normalization
2956+
// =========================================================================
2957+
2958+
/// Original reproducer: DuckDB lowercases unquoted identifiers, so
2959+
/// `SELECT category` returns `category`. `VISUALISE CATEGORY AS x` must
2960+
/// resolve to `category` before validation.
2961+
#[cfg(feature = "duckdb")]
2962+
#[test]
2963+
fn test_case_insensitive_visualise_refs() {
2964+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
2965+
reader
2966+
.connection()
2967+
.execute(
2968+
"CREATE TABLE case_test AS SELECT 'A' AS category, 10 AS value \
2969+
UNION ALL SELECT 'B', 20",
2970+
duckdb::params![],
2971+
)
2972+
.unwrap();
2973+
2974+
let query = r#"
2975+
SELECT category, value FROM case_test
2976+
VISUALISE CATEGORY AS x, VALUE AS y
2977+
DRAW bar
2978+
"#;
2979+
2980+
let result = prepare_data_with_reader(query, &reader);
2981+
assert!(
2982+
result.is_ok(),
2983+
"Uppercase VISUALISE refs should resolve to lowercase schema: {:?}",
2984+
result.err()
2985+
);
2986+
}
2987+
2988+
#[cfg(feature = "duckdb")]
2989+
#[test]
2990+
fn test_mixed_case_visualise_refs() {
2991+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
2992+
reader
2993+
.connection()
2994+
.execute(
2995+
"CREATE TABLE mixed AS SELECT 'A' AS category, 10 AS value \
2996+
UNION ALL SELECT 'B', 20",
2997+
duckdb::params![],
2998+
)
2999+
.unwrap();
3000+
3001+
let query = r#"
3002+
SELECT category, value FROM mixed
3003+
VISUALISE CaTeGoRy AS x, VaLuE AS y
3004+
DRAW bar
3005+
"#;
3006+
3007+
let result = prepare_data_with_reader(query, &reader);
3008+
assert!(
3009+
result.is_ok(),
3010+
"Mixed-case VISUALISE refs should normalize: {:?}",
3011+
result.err()
3012+
);
3013+
}
3014+
3015+
#[cfg(feature = "duckdb")]
3016+
#[test]
3017+
fn test_case_insensitive_partition_by() {
3018+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
3019+
reader
3020+
.connection()
3021+
.execute(
3022+
"CREATE TABLE pb AS SELECT 1 AS x, 10 AS y, 'A' AS category \
3023+
UNION ALL SELECT 2, 20, 'B'",
3024+
duckdb::params![],
3025+
)
3026+
.unwrap();
3027+
3028+
let query = r#"
3029+
SELECT x, y, category FROM pb
3030+
VISUALISE x AS x, y AS y
3031+
DRAW line PARTITION BY CATEGORY
3032+
"#;
3033+
3034+
let result = prepare_data_with_reader(query, &reader);
3035+
assert!(
3036+
result.is_ok(),
3037+
"Uppercase PARTITION BY should resolve to lowercase schema: {:?}",
3038+
result.err()
3039+
);
3040+
}
3041+
3042+
#[cfg(feature = "duckdb")]
3043+
#[test]
3044+
fn test_case_insensitive_facet() {
3045+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
3046+
reader
3047+
.connection()
3048+
.execute(
3049+
"CREATE TABLE facet_case AS SELECT 1 AS x, 10 AS y, 'N' AS region \
3050+
UNION ALL SELECT 2, 20, 'S'",
3051+
duckdb::params![],
3052+
)
3053+
.unwrap();
3054+
3055+
let query = r#"
3056+
SELECT x, y, region FROM facet_case
3057+
VISUALISE x AS x, y AS y
3058+
DRAW point
3059+
FACET REGION
3060+
"#;
3061+
3062+
let result = prepare_data_with_reader(query, &reader);
3063+
assert!(
3064+
result.is_ok(),
3065+
"Uppercase FACET variable should resolve to lowercase schema: {:?}",
3066+
result.err()
3067+
);
3068+
}
3069+
3070+
/// Multi-source layers with different schemas — the case the old PR #143
3071+
/// got wrong. Each layer's mapping must be normalized against that layer's
3072+
/// own schema, not the first layer's.
3073+
#[cfg(feature = "duckdb")]
3074+
#[test]
3075+
fn test_multi_source_layers_case_insensitive() {
3076+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
3077+
reader
3078+
.connection()
3079+
.execute(
3080+
"CREATE TABLE temps AS SELECT 1 AS date, 20.0 AS value \
3081+
UNION ALL SELECT 2, 21.0",
3082+
duckdb::params![],
3083+
)
3084+
.unwrap();
3085+
reader
3086+
.connection()
3087+
.execute(
3088+
"CREATE TABLE ozone AS SELECT 1 AS date, 0.05 AS value \
3089+
UNION ALL SELECT 2, 0.06",
3090+
duckdb::params![],
3091+
)
3092+
.unwrap();
3093+
3094+
let query = r#"
3095+
VISUALISE Date AS x, Value AS y
3096+
DRAW line MAPPING Date AS x, Value AS y FROM temps
3097+
DRAW line MAPPING DATE AS x, VALUE AS y FROM ozone
3098+
"#;
3099+
3100+
let result = prepare_data_with_reader(query, &reader);
3101+
assert!(
3102+
result.is_ok(),
3103+
"Per-layer normalization should work across multi-source layers: {:?}",
3104+
result.err()
3105+
);
3106+
}
3107+
3108+
// Direct tests for the normalizer's match-resolution contract.
3109+
// (DuckDB itself rejects case-colliding column names even when quoted,
3110+
// so the ambiguous/exact-match scenarios can't be exercised end-to-end.)
3111+
3112+
#[test]
3113+
fn test_normalize_column_ref_exact_match_wins() {
3114+
let mut name = "foo".to_string();
3115+
normalize_column_ref(&mut name, &["Foo", "foo"]);
3116+
assert_eq!(name, "foo");
3117+
}
3118+
3119+
#[test]
3120+
fn test_normalize_column_ref_unique_case_match_rewrites() {
3121+
let mut name = "CATEGORY".to_string();
3122+
normalize_column_ref(&mut name, &["category", "value"]);
3123+
assert_eq!(name, "category");
3124+
}
3125+
3126+
#[test]
3127+
fn test_normalize_column_ref_ambiguous_left_alone() {
3128+
let mut name = "FOO".to_string();
3129+
normalize_column_ref(&mut name, &["Foo", "foo"]);
3130+
assert_eq!(name, "FOO", "ambiguous match must not be silently resolved");
3131+
}
3132+
3133+
#[test]
3134+
fn test_normalize_column_ref_no_match_left_alone() {
3135+
let mut name = "missing".to_string();
3136+
normalize_column_ref(&mut name, &["a", "b"]);
3137+
assert_eq!(name, "missing");
3138+
}
3139+
28623140
// =========================================================================
28633141
// validate() — internal aesthetic names must not leak into error messages
28643142
// =========================================================================

0 commit comments

Comments
 (0)