Skip to content

Commit b5d2278

Browse files
teunbrandclaude
andcommitted
Unify change detection logic for line segmentation and text RLE
Extract common `find_change_starts()` function that finds row indices where any specified column changes value. Uses efficient slice-based comparison instead of shift() to avoid allocation and null handling. Both line segmentation and text font run-length encoding now use the same underlying implementation. Inline former `find_group_boundaries()` stub at its single call site. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 6fa7039 commit b5d2278

1 file changed

Lines changed: 36 additions & 57 deletions

File tree

src/writer/vegalite/layer.rs

Lines changed: 36 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -334,32 +334,29 @@ impl GeomRenderer for PathRenderer {
334334
// Line Renderer
335335
// =============================================================================
336336

337-
/// Find indices where group values change in a DataFrame
337+
/// Find row indices where any of the specified columns change value.
338338
///
339-
/// Returns a sorted vector of indices marking group boundaries. The first element
340-
/// is always 0 (start of first group), followed by indices where any of the
341-
/// group columns change value.
342-
fn find_group_boundaries(df: &DataFrame, group_columns: &[String]) -> Result<Vec<usize>> {
339+
/// Returns a sorted vector starting with 0 (first row), followed by indices
340+
/// where any column value differs from the previous row. Does not include
341+
/// the final boundary (n_rows).
342+
///
343+
/// Used by both line segmentation and text font run-length encoding.
344+
fn find_change_starts(df: &DataFrame, columns: &[String]) -> Result<Vec<usize>> {
343345
use polars::prelude::*;
344346

345347
let n_rows = df.height();
346348

347-
if group_columns.is_empty() {
348-
// No grouping: treat entire dataset as one group
349-
return Ok(vec![0, n_rows]);
350-
}
351-
352-
if n_rows <= 1 {
353-
return Ok(vec![0, n_rows]);
349+
if columns.is_empty() || n_rows <= 1 {
350+
return Ok(vec![0]);
354351
}
355352

356353
// Initialize change mask as all false (no changes)
357354
let mut change_mask = BooleanChunked::full("change_mask".into(), false, n_rows - 1);
358355

359-
// For each group column, OR its change mask into the accumulator
360-
for col_name in group_columns {
356+
// For each column, OR its change mask into the accumulator
357+
for col_name in columns {
361358
let series = df.column(col_name).map_err(|e| {
362-
GgsqlError::InternalError(format!("Group column '{}' not found: {}", col_name, e))
359+
GgsqlError::InternalError(format!("Column '{}' not found: {}", col_name, e))
363360
})?;
364361

365362
// Compare each row with the previous row
@@ -377,17 +374,14 @@ fn find_group_boundaries(df: &DataFrame, group_columns: &[String]) -> Result<Vec
377374
}
378375

379376
// Extract indices where mask is true (offset by 1 since we compared with previous)
380-
let mut boundaries = vec![0];
377+
let mut change_starts = vec![0];
381378
for (idx, changed) in change_mask.into_iter().enumerate() {
382379
if changed == Some(true) {
383-
boundaries.push(idx + 1);
380+
change_starts.push(idx + 1);
384381
}
385382
}
386383

387-
// Add final boundary (end of data)
388-
boundaries.push(n_rows);
389-
390-
Ok(boundaries)
384+
Ok(change_starts)
391385
}
392386

393387
/// Check if an aesthetic varies within any group segment
@@ -472,7 +466,14 @@ impl GeomRenderer for LineRenderer {
472466
.collect();
473467

474468
// Compute group boundaries once (without the material aesthetics)
475-
let group_boundaries = find_group_boundaries(df, &partition_columns)?;
469+
let n_rows = df.height();
470+
let group_boundaries = if partition_columns.is_empty() || n_rows <= 1 {
471+
vec![0, n_rows]
472+
} else {
473+
let mut boundaries = find_change_starts(df, &partition_columns)?;
474+
boundaries.push(n_rows);
475+
boundaries
476+
};
476477

477478
// Check each mapped material aesthetic for within-group variation
478479
for (aesthetic, col) in &mapped_material_aesthetics {
@@ -871,43 +872,29 @@ impl TextRenderer {
871872
return Ok((DataFrame::default(), Vec::new()));
872873
}
873874

874-
// Build boolean mask showing where any font property changes
875-
let mut changed = BooleanChunked::full("changed".into(), false, nrows);
876-
let mut font_columns: HashMap<&str, &polars::prelude::Column> = HashMap::new();
877-
878-
for aesthetic in [
875+
// Collect font property column names that exist in the DataFrame
876+
let font_aesthetics = [
879877
"typeface",
880878
"fontweight",
881879
"italic",
882880
"hjust",
883881
"vjust",
884882
"rotation",
885-
] {
886-
if let Ok(col) = df.column(&naming::aesthetic_column(aesthetic)) {
887-
let col_changed = col.not_equal(&col.shift(1)).map_err(|e| {
888-
GgsqlError::InternalError(format!("Failed to compare column: {}", e))
889-
})?;
890-
changed = &changed | &col_changed;
891-
font_columns.insert(aesthetic, col);
892-
}
893-
}
883+
];
894884

895-
// Extract change indices (where mask is true)
896-
// shift() creates nulls at position 0, which we treat as a change point
897-
let mut change_indices: Vec<usize> = Vec::new();
898-
for (i, val) in changed.iter().enumerate() {
899-
if val == Some(true) || val.is_none() {
900-
// Treat null (from shift) or true as change point
901-
change_indices.push(i);
885+
let mut font_column_names = Vec::new();
886+
let mut font_columns: HashMap<&str, &polars::prelude::Column> = HashMap::new();
887+
888+
for aesthetic in font_aesthetics {
889+
let col_name = naming::aesthetic_column(aesthetic);
890+
if let Ok(col) = df.column(&col_name) {
891+
font_column_names.push(col_name);
892+
font_columns.insert(aesthetic, col);
902893
}
903894
}
904895

905-
// First row is always a change point (shift comparison is null)
906-
if !change_indices.is_empty() && change_indices[0] != 0 {
907-
change_indices.insert(0, 0);
908-
} else if change_indices.is_empty() {
909-
change_indices.push(0);
910-
}
896+
// Find indices where any font property changes
897+
let change_indices = find_change_starts(df, &font_column_names)?;
911898

912899
// Calculate run lengths
913900
let run_lengths: Vec<usize> = change_indices
@@ -924,14 +911,6 @@ impl TextRenderer {
924911
"indices".into(),
925912
change_indices.iter().map(|&i| i as u32).collect(),
926913
);
927-
let font_aesthetics = [
928-
"typeface",
929-
"fontweight",
930-
"italic",
931-
"hjust",
932-
"vjust",
933-
"rotation",
934-
];
935914

936915
let mut result_cols = Vec::new();
937916
for aesthetic in font_aesthetics {

0 commit comments

Comments
 (0)