Skip to content

Commit c4ee2b7

Browse files
committed
add tests
1 parent faf60e4 commit c4ee2b7

2 files changed

Lines changed: 136 additions & 55 deletions

File tree

datafusion/physical-expr/src/expressions/in_list.rs

Lines changed: 18 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ fn instantiate_static_filter(
177177
// Float primitive types (use ordered wrappers for Hash/Eq)
178178
DataType::Float32 => Ok(Arc::new(Float32StaticFilter::try_new(&in_array)?)),
179179
DataType::Float64 => Ok(Arc::new(Float64StaticFilter::try_new(&in_array)?)),
180+
// Utf8 types
180181
DataType::Utf8 => Ok(Arc::new(Utf8StaticFilter::try_new(&in_array)?)),
181182
DataType::LargeUtf8 => Ok(Arc::new(LargeUtf8StaticFilter::try_new(&in_array)?)),
182183
DataType::Utf8View => Ok(Arc::new(Utf8ViewStaticFilter::try_new(&in_array)?)),
@@ -187,6 +188,7 @@ fn instantiate_static_filter(
187188
}
188189
}
189190

191+
/// Generates specialized [`StaticFilter`] implementations for string types (Utf8, LargeUtf8, Utf8View).
190192
macro_rules! string_static_filter {
191193
($Name:ident, $ArrayType:ty, $TryDowncast:ident $(::<$offset:ty>)?) => {
192194
struct $Name {
@@ -4110,41 +4112,25 @@ mod tests {
41104112
);
41114113
}
41124114

4113-
let string_cases = vec![
4114-
(
4115-
"Utf8",
4116-
Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef,
4117-
Arc::new(StringArray::from(vec!["a", "d", "b"])) as ArrayRef,
4118-
),
4119-
(
4120-
"LargeUtf8",
4121-
Arc::new(LargeStringArray::from(vec!["a", "b", "c"])) as ArrayRef,
4122-
Arc::new(LargeStringArray::from(vec!["a", "d", "b"])) as ArrayRef,
4123-
),
4124-
(
4125-
"Utf8View",
4126-
Arc::new(StringViewArray::from(vec!["a", "b", "c"])) as ArrayRef,
4127-
Arc::new(StringViewArray::from(vec!["a", "d", "b"])) as ArrayRef,
4128-
),
4129-
];
4130-
4131-
for (name, in_array, needle) in string_cases {
4132-
assert_eq!(
4133-
expected,
4134-
eval_in_list_from_array(Arc::clone(&needle), Arc::clone(&in_array),)?,
4135-
"same-type failed for {name}"
4136-
);
4137-
4138-
assert_eq!(
4139-
expected,
4140-
eval_in_list_from_array(wrap_in_dict(needle), in_array)?,
4141-
"dict-needle failed for {name}"
4142-
);
4143-
}
4144-
4115+
// Utf8 (falls through to ArrayStaticFilter)
41454116
let utf8_in = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
41464117
let utf8_needle = Arc::new(StringArray::from(vec!["a", "d", "b"])) as ArrayRef;
41474118

4119+
// Utf8 in_array, Utf8 needle
4120+
assert_eq!(
4121+
expected,
4122+
eval_in_list_from_array(Arc::clone(&utf8_needle), Arc::clone(&utf8_in),)?
4123+
);
4124+
4125+
// Utf8 in_array, Dict(Utf8) needle
4126+
assert_eq!(
4127+
expected,
4128+
eval_in_list_from_array(
4129+
wrap_in_dict(Arc::clone(&utf8_needle)),
4130+
Arc::clone(&utf8_in),
4131+
)?
4132+
);
4133+
41484134
// Dict(Utf8) in_array, Dict(Utf8) needle: the #20937 bug
41494135
assert_eq!(
41504136
expected,
@@ -4248,27 +4234,4 @@ mod tests {
42484234

42494235
Ok(())
42504236
}
4251-
4252-
#[test]
4253-
fn test_utf8_static_filter_avoids_string_copies() -> Result<()> {
4254-
let in_array = Arc::new(StringArray::from(vec![
4255-
Some("alpha"),
4256-
Some("beta"),
4257-
Some("alpha"),
4258-
None,
4259-
])) as ArrayRef;
4260-
4261-
let filter = Utf8StaticFilter::try_new(&in_array)?;
4262-
4263-
assert_eq!(filter.null_count(), 1);
4264-
assert_eq!(filter.map.len(), 2);
4265-
4266-
let needle = Arc::new(StringArray::from(vec![Some("alpha"), Some("gamma"), None]))
4267-
as ArrayRef;
4268-
4269-
let result = filter.contains(needle.as_ref(), false)?;
4270-
assert_eq!(result, BooleanArray::from(vec![Some(true), None, None]));
4271-
4272-
Ok(())
4273-
}
42744237
}

datafusion/sqllogictest/test_files/expr.slt

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,86 @@ SELECT 'c' NOT IN ('a','b',NULL)
10671067
----
10681068
NULL
10691069

1070+
query B
1071+
SELECT arrow_cast('a', 'Utf8View') IN (arrow_cast('a', 'Utf8View'), arrow_cast('b', 'Utf8View'))
1072+
----
1073+
true
1074+
1075+
query B
1076+
SELECT arrow_cast('c', 'Utf8View') IN (arrow_cast('a', 'Utf8View'), arrow_cast('b', 'Utf8View'))
1077+
----
1078+
false
1079+
1080+
query B
1081+
SELECT arrow_cast('a', 'Utf8View') NOT IN (arrow_cast('a', 'Utf8View'), arrow_cast('b', 'Utf8View'))
1082+
----
1083+
false
1084+
1085+
query B
1086+
SELECT arrow_cast('c', 'Utf8View') NOT IN (arrow_cast('a', 'Utf8View'), arrow_cast('b', 'Utf8View'))
1087+
----
1088+
true
1089+
1090+
query B
1091+
SELECT arrow_cast('a', 'Utf8View') IN (arrow_cast('a', 'Utf8View'), arrow_cast('b', 'Utf8View'), NULL)
1092+
----
1093+
true
1094+
1095+
query B
1096+
SELECT arrow_cast('c', 'Utf8View') IN (arrow_cast('a', 'Utf8View'), arrow_cast('b', 'Utf8View'), NULL)
1097+
----
1098+
NULL
1099+
1100+
query B
1101+
SELECT arrow_cast('a', 'Utf8View') NOT IN (arrow_cast('a', 'Utf8View'), arrow_cast('b', 'Utf8View'), NULL)
1102+
----
1103+
false
1104+
1105+
query B
1106+
SELECT arrow_cast('c', 'Utf8View') NOT IN (arrow_cast('a', 'Utf8View'), arrow_cast('b', 'Utf8View'), NULL)
1107+
----
1108+
NULL
1109+
1110+
query B
1111+
SELECT arrow_cast('a', 'LargeUtf8') IN (arrow_cast('a', 'LargeUtf8'), arrow_cast('b', 'LargeUtf8'))
1112+
----
1113+
true
1114+
1115+
query B
1116+
SELECT arrow_cast('c', 'LargeUtf8') IN (arrow_cast('a', 'LargeUtf8'), arrow_cast('b', 'LargeUtf8'))
1117+
----
1118+
false
1119+
1120+
query B
1121+
SELECT arrow_cast('a', 'LargeUtf8') NOT IN (arrow_cast('a', 'LargeUtf8'), arrow_cast('b', 'LargeUtf8'))
1122+
----
1123+
false
1124+
1125+
query B
1126+
SELECT arrow_cast('c', 'LargeUtf8') NOT IN (arrow_cast('a', 'LargeUtf8'), arrow_cast('b', 'LargeUtf8'))
1127+
----
1128+
true
1129+
1130+
query B
1131+
SELECT arrow_cast('a', 'LargeUtf8') IN (arrow_cast('a', 'LargeUtf8'), arrow_cast('b', 'LargeUtf8'), NULL)
1132+
----
1133+
true
1134+
1135+
query B
1136+
SELECT arrow_cast('c', 'LargeUtf8') IN (arrow_cast('a', 'LargeUtf8'), arrow_cast('b', 'LargeUtf8'), NULL)
1137+
----
1138+
NULL
1139+
1140+
query B
1141+
SELECT arrow_cast('a', 'LargeUtf8') NOT IN (arrow_cast('a', 'LargeUtf8'), arrow_cast('b', 'LargeUtf8'), NULL)
1142+
----
1143+
false
1144+
1145+
query B
1146+
SELECT arrow_cast('c', 'LargeUtf8') NOT IN (arrow_cast('a', 'LargeUtf8'), arrow_cast('b', 'LargeUtf8'), NULL)
1147+
----
1148+
NULL
1149+
10701150
query B
10711151
SELECT 0 IN (0,1,2)
10721152
----
@@ -1398,11 +1478,49 @@ SELECT struct(1, 1) IN (struct(NULL, NULL))
13981478
----
13991479
false
14001480

1481+
statement ok
1482+
CREATE OR REPLACE TABLE in_list_str_test(s TEXT) AS VALUES ('alpha'), ('beta'), ('gamma'), (NULL);
1483+
1484+
query TB
1485+
SELECT s, s IN ('alpha', 'beta') FROM in_list_str_test ORDER BY s;
1486+
----
1487+
alpha true
1488+
beta true
1489+
gamma false
1490+
NULL NULL
1491+
1492+
query TB
1493+
SELECT s, s NOT IN ('alpha', 'beta') FROM in_list_str_test ORDER BY s;
1494+
----
1495+
alpha false
1496+
beta false
1497+
gamma true
1498+
NULL NULL
1499+
1500+
query TB
1501+
SELECT s, s IN ('alpha', NULL) FROM in_list_str_test ORDER BY s;
1502+
----
1503+
alpha true
1504+
beta NULL
1505+
gamma NULL
1506+
NULL NULL
1507+
1508+
query TB
1509+
SELECT s, s NOT IN ('alpha', NULL) FROM in_list_str_test ORDER BY s;
1510+
----
1511+
alpha false
1512+
beta NULL
1513+
gamma NULL
1514+
NULL NULL
1515+
14011516
# Cleanup test tables
14021517

14031518
statement ok
14041519
DROP TABLE in_list_test;
14051520

1521+
statement ok
1522+
DROP TABLE in_list_str_test;
1523+
14061524
statement ok
14071525
DROP TABLE in_list_col_test;
14081526

0 commit comments

Comments
 (0)