Skip to content

Commit 88baccd

Browse files
authored
Add sqlite reader and adjust SQL queries to work there (#182)
* Add sqlite reader * Portable GREATEST/LEAST and various sqlite tweaks * Drop remaining __ggsql_stat_ columns not consumed by remappings * Use chrono ToSql/FromSql in sqlite reader * Add sqlite flags * Use portable SQL * Minor tweaks to naming and features * Add SQL dialect function overriding for duckdb * cargo fmt * Fixup grammar: subquery as function argument * Use dialect GREATEST() in histogram * cargo fmt
1 parent dd45e3c commit 88baccd

27 files changed

Lines changed: 1976 additions & 400 deletions

File tree

.cargo/config.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
[env]
2+
LIBSQLITE3_FLAGS = "-DSQLITE_ENABLE_MATH_FUNCTIONS"
3+
14
[target.wasm32-unknown-unknown]
25
rustflags = [
36
"-C", "link-args=-z stack-size=16777216",

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ polars-ops = { version = "0.52", features = ["pivot"] }
3737
duckdb = { version = "~1.4", features = ["bundled", "vtab-arrow"] }
3838
arrow = { version = "56", default-features = false, features = ["ipc"] }
3939
postgres = "0.19"
40-
sqlx = { version = "0.8", features = ["postgres"] }
41-
rusqlite = "0.32"
40+
rusqlite = { version = "0.38", features = ["bundled", "chrono", "functions", "window"] }
4241

4342
# Writers
4443
plotters = "0.3"

ggsql-python/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ struct PyReaderBridge {
127127
obj: Py<PyAny>,
128128
}
129129

130+
static ANSI_DIALECT: ggsql::reader::AnsiDialect = ggsql::reader::AnsiDialect;
131+
130132
impl Reader for PyReaderBridge {
131133
fn execute_sql(&self, sql: &str) -> ggsql::Result<DataFrame> {
132134
Python::attach(|py| {
@@ -161,6 +163,10 @@ impl Reader for PyReaderBridge {
161163
Ok(())
162164
})
163165
}
166+
167+
fn dialect(&self) -> &dyn ggsql::reader::SqlDialect {
168+
&ANSI_DIALECT
169+
}
164170
}
165171

166172
// ============================================================================

src/Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ polars-ops.workspace = true
3838
duckdb = { workspace = true, optional = true }
3939
arrow = { workspace = true, optional = true }
4040
postgres = { workspace = true, optional = true }
41-
sqlx = { workspace = true, optional = true }
4241
rusqlite = { workspace = true, optional = true }
4342

4443
# Writers
@@ -77,15 +76,16 @@ proptest.workspace = true
7776
ureq = "3"
7877

7978
[features]
80-
default = ["duckdb", "sqlite", "vegalite", "ipc", "builtin-data"]
79+
default = ["duckdb", "sqlite", "vegalite", "ipc", "parquet", "builtin-data"]
8180
ipc = ["polars/ipc"]
8281
duckdb = ["dep:duckdb", "dep:arrow"]
8382
polars-sql = ["polars/sql"]
84-
builtin-data = ["polars/parquet"]
83+
parquet = ["polars/parquet"]
8584
postgres = ["dep:postgres"]
8685
sqlite = ["dep:rusqlite"]
8786
vegalite = []
8887
ggplot2 = []
88+
builtin-data = []
8989
python = ["dep:pyo3"]
9090
rest-api = ["dep:axum", "dep:tokio", "dep:tower-http", "dep:tracing", "dep:tracing-subscriber", "duckdb", "vegalite"]
9191
all-readers = ["duckdb", "postgres", "sqlite", "polars-sql"]

src/execute/casting.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
//! scale requirements and updating type info accordingly.
55
66
use crate::plot::scale::coerce_dtypes;
7-
use crate::plot::{CastTargetType, Layer, ParameterValue, Plot, SqlTypeNames};
7+
use crate::plot::{CastTargetType, Layer, ParameterValue, Plot};
8+
use crate::reader::SqlDialect;
89
use crate::{naming, DataSource};
910
use polars::prelude::{DataType, TimeUnit};
1011
use std::collections::{HashMap, HashSet};
@@ -57,7 +58,7 @@ pub fn literal_to_sql(lit: &ParameterValue) -> String {
5758
pub fn determine_type_requirements(
5859
spec: &Plot,
5960
layer_type_info: &[Vec<TypeInfo>],
60-
type_names: &SqlTypeNames,
61+
dialect: &dyn SqlDialect,
6162
) -> Vec<Vec<TypeRequirement>> {
6263
use crate::plot::scale::TransformKind;
6364

@@ -123,7 +124,7 @@ pub fn determine_type_requirements(
123124

124125
// Check if this specific column needs casting
125126
if let Some(cast_target) = scale_type.required_cast_type(col_dtype, &target_dtype) {
126-
if let Some(sql_type) = type_names.for_target(cast_target) {
127+
if let Some(sql_type) = dialect.type_name_for(cast_target) {
127128
// Don't add duplicate requirements for same column
128129
if !requirements.iter().any(|r| r.column == col_name) {
129130
requirements.push(TypeRequirement {
@@ -155,7 +156,7 @@ pub fn determine_type_requirements(
155156
};
156157

157158
if needs_int_cast {
158-
if let Some(sql_type) = type_names.for_target(CastTargetType::Integer) {
159+
if let Some(sql_type) = dialect.type_name_for(CastTargetType::Integer) {
159160
// Don't add duplicate requirements for same column
160161
if !requirements.iter().any(|r| r.column == col_name) {
161162
requirements.push(TypeRequirement {

src/execute/layer.rs

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
//! transformations, stat transforms, and post-query operations.
55
66
use crate::plot::{
7-
AestheticValue, DefaultAestheticValue, Layer, ParameterValue, Scale, Schema, SqlTypeNames,
8-
StatResult,
7+
AestheticValue, DefaultAestheticValue, Layer, ParameterValue, Scale, Schema, StatResult,
98
};
9+
use crate::reader::SqlDialect;
1010
use crate::{naming, DataFrame, GgsqlError, Result};
1111
use polars::prelude::DataType;
1212
use std::collections::{HashMap, HashSet};
@@ -150,6 +150,17 @@ pub fn apply_remappings_post_query(df: DataFrame, layer: &Layer) -> Result<DataF
150150
}
151151
}
152152

153+
// Drop any remaining __ggsql_stat_* columns that weren't consumed by remappings.
154+
let stat_cols: Vec<String> = df
155+
.get_column_names()
156+
.into_iter()
157+
.filter(|name| naming::is_stat_column(name))
158+
.map(|name| name.to_string())
159+
.collect();
160+
if !stat_cols.is_empty() {
161+
df = df.drop_many(stat_cols);
162+
}
163+
153164
Ok(df)
154165
}
155166

@@ -183,14 +194,14 @@ pub fn literal_to_series(name: &str, lit: &ParameterValue, len: usize) -> polars
183194
/// * `layer` - The layer configuration
184195
/// * `schema` - The layer's schema (used for column dtype lookup)
185196
/// * `scales` - All resolved scales
186-
/// * `type_names` - SQL type names for the database backend
197+
/// * `dialect` - SQL dialect for the database backend
187198
pub fn apply_pre_stat_transform(
188199
query: &str,
189200
layer: &Layer,
190201
full_schema: &Schema,
191202
aesthetic_schema: &Schema,
192203
scales: &[Scale],
193-
type_names: &SqlTypeNames,
204+
dialect: &dyn SqlDialect,
194205
) -> String {
195206
let mut transform_exprs: Vec<(String, String)> = vec![];
196207
let mut transformed_columns: HashSet<String> = HashSet::new();
@@ -226,7 +237,7 @@ pub fn apply_pre_stat_transform(
226237
// Get pre-stat SQL transformation from scale type (if applicable)
227238
// Each scale type's pre_stat_transform_sql() returns None if not applicable
228239
if let Some(sql) =
229-
scale_type.pre_stat_transform_sql(&aes_col_name, &col_dtype, scale, type_names)
240+
scale_type.pre_stat_transform_sql(&aes_col_name, &col_dtype, scale, dialect)
230241
{
231242
transformed_columns.insert(aes_col_name.clone());
232243
transform_exprs.push((aes_col_name, sql));
@@ -336,7 +347,7 @@ pub fn build_layer_base_query(
336347
/// * `base_query` - The base query from build_layer_base_query
337348
/// * `schema` - The layer's schema (with min/max from base_query)
338349
/// * `scales` - All resolved scales
339-
/// * `type_names` - SQL type names for the database backend
350+
/// * `dialect` - SQL dialect for the database backend
340351
/// * `execute_query` - Function to execute queries (needed for some stat transforms)
341352
///
342353
/// # Returns
@@ -347,7 +358,7 @@ pub fn apply_layer_transforms<F>(
347358
base_query: &str,
348359
schema: &Schema,
349360
scales: &[Scale],
350-
type_names: &SqlTypeNames,
361+
dialect: &dyn SqlDialect,
351362
execute_query: &F,
352363
) -> Result<String>
353364
where
@@ -387,7 +398,7 @@ where
387398
schema,
388399
&aesthetic_schema,
389400
scales,
390-
type_names,
401+
dialect,
391402
);
392403

393404
// Build group_by columns from partition_by
@@ -416,6 +427,7 @@ where
416427
&group_by,
417428
&layer.parameters,
418429
execute_query,
430+
dialect,
419431
)?;
420432

421433
// Apply literal default remappings from geom defaults (e.g., y2 => 0.0 for bar baseline).
@@ -518,12 +530,6 @@ where
518530
if stat_rename_exprs.is_empty() {
519531
transformed_query
520532
} else {
521-
let stat_col_names: Vec<String> = stat_columns
522-
.iter()
523-
.map(|s| naming::stat_column(s))
524-
.collect();
525-
let exclude_clause = format!("EXCLUDE ({})", stat_col_names.join(", "));
526-
527533
// If the transformed query uses CTEs (WITH ... SELECT ...),
528534
// we can't wrap it in a subquery because Polars SQL doesn't
529535
// support CTEs inside subqueries. Instead, split into CTE
@@ -536,16 +542,14 @@ where
536542
.and_then(super::cte::split_with_query)
537543
{
538544
format!(
539-
"{}, __ggsql_stat__ AS ({}) SELECT * {}, {} FROM __ggsql_stat__",
545+
"{}, __ggsql_stat__ AS ({}) SELECT *, {} FROM __ggsql_stat__",
540546
cte_prefix,
541547
trailing_select,
542-
exclude_clause,
543548
stat_rename_exprs.join(", ")
544549
)
545550
} else {
546551
format!(
547-
"SELECT * {}, {} FROM ({}) AS __ggsql_stat__",
548-
exclude_clause,
552+
"SELECT *, {} FROM ({}) AS __ggsql_stat__",
549553
stat_rename_exprs.join(", "),
550554
transformed_query
551555
)

src/execute/mod.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,10 @@ fn validate(layers: &[Layer], layer_schemas: &[Schema]) -> Result<()> {
144144

145145
/// Check if an aesthetic value is a null sentinel (explicit removal marker)
146146
fn is_null_sentinel(value: &AestheticValue) -> bool {
147-
matches!(value, AestheticValue::Literal(crate::plot::ParameterValue::Null))
147+
matches!(
148+
value,
149+
AestheticValue::Literal(crate::plot::ParameterValue::Null)
150+
)
148151
}
149152

150153
/// Merge global mappings into layer aesthetics and expand wildcards
@@ -887,7 +890,7 @@ pub struct PreparedData {
887890
/// * `reader` - A Reader implementation for executing SQL
888891
pub fn prepare_data_with_reader<R: Reader>(query: &str, reader: &R) -> Result<PreparedData> {
889892
let execute_query = |sql: &str| reader.execute_sql(sql);
890-
let type_names = reader.sql_type_names();
893+
let dialect = reader.dialect();
891894

892895
// Parse once and create SourceTree
893896
let source_tree = parser::SourceTree::new(query)?;
@@ -1025,7 +1028,7 @@ pub fn prepare_data_with_reader<R: Reader>(query: &str, reader: &R) -> Result<Pr
10251028

10261029
// Determine which columns need type casting
10271030
let type_requirements =
1028-
casting::determine_type_requirements(&specs[0], &layer_type_info, &type_names);
1031+
casting::determine_type_requirements(&specs[0], &layer_type_info, dialect);
10291032

10301033
// Update type info with post-cast dtypes
10311034
// This ensures subsequent schema extraction and scale resolution see the correct types
@@ -1094,7 +1097,7 @@ pub fn prepare_data_with_reader<R: Reader>(query: &str, reader: &R) -> Result<Pr
10941097
&layer_base_queries[idx],
10951098
&layer_schemas[idx],
10961099
&scales,
1097-
&type_names,
1100+
dialect,
10981101
&execute_query,
10991102
)?;
11001103
layer_queries.push(layer_query);

src/execute/schema.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ pub fn extract_series_value(
145145

146146
/// Fetch only column types (no min/max) from a query.
147147
///
148-
/// Uses LIMIT 0 to get schema without reading data.
148+
/// Uses LIMIT 1 to get schema while minimally reading data.
149149
/// Returns `(name, dtype, is_discrete)` tuples for each column.
150150
///
151151
/// This is the first phase of the split schema extraction approach:
@@ -157,7 +157,7 @@ where
157157
F: Fn(&str) -> Result<DataFrame>,
158158
{
159159
let schema_query = format!(
160-
"SELECT * FROM ({}) AS {} LIMIT 0",
160+
"SELECT * FROM ({}) AS {} LIMIT 1",
161161
query,
162162
naming::SCHEMA_ALIAS
163163
);

src/parser/builder.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3350,7 +3350,10 @@ mod tests {
33503350

33513351
let literal_node = source.find_node(&root, "(literal_value) @lit").unwrap();
33523352
let parsed = parse_literal_value(&literal_node, &source).unwrap();
3353-
assert!(matches!(parsed, AestheticValue::Literal(ParameterValue::Null)));
3353+
assert!(matches!(
3354+
parsed,
3355+
AestheticValue::Literal(ParameterValue::Null)
3356+
));
33543357
}
33553358

33563359
// ========================================

src/plot/layer/geom/bar.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use super::types::get_column_name;
77
use super::{DefaultAesthetics, DefaultParam, DefaultParamValue, GeomTrait, GeomType, StatResult};
88
use crate::naming;
99
use crate::plot::types::{DefaultAestheticValue, ParameterValue};
10+
use crate::reader::SqlDialect;
1011
use crate::{DataFrame, GgsqlError, Mappings, Result};
1112

1213
use super::types::Schema;
@@ -81,6 +82,7 @@ impl GeomTrait for Bar {
8182
group_by: &[String],
8283
_parameters: &HashMap<String, ParameterValue>,
8384
_execute_query: &dyn Fn(&str) -> Result<DataFrame>,
85+
_dialect: &dyn SqlDialect,
8486
) -> Result<StatResult> {
8587
stat_bar_count(query, schema, aesthetics, group_by)
8688
}

0 commit comments

Comments
 (0)