Skip to content

Commit 06f532f

Browse files
authored
Fix VALUES and column aliases in CTEs (#188)
* Fix column aliases in CTE extraction * Allow for VALUES (...) in cte
1 parent 2c15821 commit 06f532f

3 files changed

Lines changed: 94 additions & 6 deletions

File tree

src/execute/cte.rs

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ pub struct CteDefinition {
1616
pub name: String,
1717
/// Full SQL text of the CTE body (including the SELECT statement inside)
1818
pub body: String,
19+
/// Optional column aliases: WITH t(value, label) AS (...) → ["value", "label"]
20+
pub column_aliases: Vec<String>,
1921
}
2022

2123
/// Extract CTE definitions from the source tree
@@ -36,17 +38,22 @@ pub fn extract_ctes(source_tree: &SourceTree) -> Vec<CteDefinition> {
3638
/// Parse a single CTE definition node into a CteDefinition
3739
fn parse_cte_definition(node: &Node, source: &str) -> Option<CteDefinition> {
3840
let mut name: Option<String> = None;
41+
let mut column_aliases: Vec<String> = Vec::new();
3942
let mut body_start: Option<usize> = None;
4043
let mut body_end: Option<usize> = None;
4144

4245
let mut cursor = node.walk();
4346
for child in node.children(&mut cursor) {
4447
match child.kind() {
4548
"identifier" => {
46-
name = Some(get_node_text(&child, source).to_string());
49+
// First identifier is the CTE name, subsequent ones are column aliases
50+
if name.is_none() {
51+
name = Some(get_node_text(&child, source).to_string());
52+
} else {
53+
column_aliases.push(get_node_text(&child, source).to_string());
54+
}
4755
}
48-
"select_statement" => {
49-
// The SELECT inside the CTE
56+
"select_statement" | "subquery_body" | "with_statement" => {
5057
body_start = Some(child.start_byte());
5158
body_end = Some(child.end_byte());
5259
}
@@ -57,7 +64,11 @@ fn parse_cte_definition(node: &Node, source: &str) -> Option<CteDefinition> {
5764
match (name, body_start, body_end) {
5865
(Some(n), Some(start), Some(end)) => {
5966
let body = source[start..end].to_string();
60-
Some(CteDefinition { name: n, body })
67+
Some(CteDefinition {
68+
name: n,
69+
body,
70+
column_aliases,
71+
})
6172
}
6273
_ => None,
6374
}
@@ -136,9 +147,27 @@ pub fn materialize_ctes(ctes: &[CteDefinition], reader: &dyn Reader) -> Result<H
136147
let temp_table_name = naming::cte_table(&cte.name);
137148

138149
// Execute the CTE body SQL to get a DataFrame, then register it
139-
let df = reader.execute_sql(&transformed_body).map_err(|e| {
150+
let mut df = reader.execute_sql(&transformed_body).map_err(|e| {
140151
GgsqlError::ReaderError(format!("Failed to materialize CTE '{}': {}", cte.name, e))
141152
})?;
153+
154+
// Apply column aliases if present: WITH t(value, label) AS (...) renames columns
155+
if !cte.column_aliases.is_empty() && cte.column_aliases.len() == df.width() {
156+
let current_names: Vec<String> = df
157+
.get_column_names()
158+
.iter()
159+
.map(|s| s.to_string())
160+
.collect();
161+
for (old, new) in current_names.iter().zip(cte.column_aliases.iter()) {
162+
df.rename(old, new.into()).map_err(|e| {
163+
GgsqlError::ReaderError(format!(
164+
"Failed to apply column alias '{}' for CTE '{}': {}",
165+
new, cte.name, e
166+
))
167+
})?;
168+
}
169+
}
170+
142171
reader.register(&temp_table_name, df, true).map_err(|e| {
143172
GgsqlError::ReaderError(format!("Failed to register CTE '{}': {}", cte.name, e))
144173
})?;
@@ -287,6 +316,28 @@ mod tests {
287316
assert_eq!(ctes[1].name, "targets");
288317
}
289318

319+
#[test]
320+
fn test_extract_ctes_with_column_aliases() {
321+
let sql = "WITH t(value, label) AS (SELECT * FROM (VALUES (70, 'Target'))) SELECT * FROM t";
322+
let source_tree = SourceTree::new(sql).unwrap();
323+
let ctes = extract_ctes(&source_tree);
324+
325+
assert_eq!(ctes.len(), 1);
326+
assert_eq!(ctes[0].name, "t");
327+
assert_eq!(ctes[0].column_aliases, vec!["value", "label"]);
328+
}
329+
330+
#[test]
331+
fn test_extract_ctes_without_column_aliases() {
332+
let sql = "WITH sales AS (SELECT * FROM raw_sales) SELECT * FROM sales";
333+
let source_tree = SourceTree::new(sql).unwrap();
334+
let ctes = extract_ctes(&source_tree);
335+
336+
assert_eq!(ctes.len(), 1);
337+
assert_eq!(ctes[0].name, "sales");
338+
assert!(ctes[0].column_aliases.is_empty());
339+
}
340+
290341
#[test]
291342
fn test_extract_ctes_none() {
292343
let sql = "SELECT * FROM sales WHERE year = 2024";

src/execute/mod.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,6 +1366,36 @@ mod tests {
13661366
assert_eq!(layer1_df.height(), 2);
13671367
}
13681368

1369+
#[cfg(feature = "duckdb")]
1370+
#[test]
1371+
fn test_layer_references_cte_with_column_aliases() {
1372+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1373+
1374+
let query = r#"
1375+
WITH t(value, label) AS (
1376+
SELECT * FROM (VALUES
1377+
(70, 'Target'),
1378+
(80, 'Warning'),
1379+
(90, 'Critical')
1380+
)
1381+
)
1382+
SELECT 1 AS date, 75 AS temperature
1383+
VISUALISE
1384+
DRAW line MAPPING date AS x, temperature AS y
1385+
DRAW rule MAPPING value AS y, label AS colour FROM t
1386+
"#;
1387+
1388+
let result = prepare_data_with_reader(query, &reader).unwrap();
1389+
1390+
// Layer 0: line from global data
1391+
let layer0_df = result.data.get(&naming::layer_key(0)).unwrap();
1392+
assert_eq!(layer0_df.height(), 1);
1393+
1394+
// Layer 1: rule from CTE with column aliases
1395+
let layer1_df = result.data.get(&naming::layer_key(1)).unwrap();
1396+
assert_eq!(layer1_df.height(), 3);
1397+
}
1398+
13691399
#[cfg(feature = "duckdb")]
13701400
#[test]
13711401
fn test_histogram_stat_transform() {

tree-sitter-ggsql/grammar.js

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,18 @@ module.exports = grammar({
8181

8282
cte_definition: $ => seq(
8383
$.identifier,
84+
optional(seq( // Optional column list: df(x, y, id)
85+
'(',
86+
$.identifier,
87+
repeat(seq(',', $.identifier)),
88+
')'
89+
)),
8490
caseInsensitive('AS'),
8591
'(',
8692
choice(
8793
$.with_statement, // Allow nested CTEs
88-
$.select_statement
94+
$.select_statement,
95+
$.subquery_body // VALUES (...) and other non-SELECT bodies
8996
),
9097
')'
9198
),

0 commit comments

Comments
 (0)