@@ -177,6 +177,7 @@ fn instantiate_static_filter(
177177 // Float primitive types (use ordered wrappers for Hash/Eq)
178178 DataType :: Float32 => Ok ( Arc :: new ( Float32StaticFilter :: try_new ( & in_array) ?) ) ,
179179 DataType :: Float64 => Ok ( Arc :: new ( Float64StaticFilter :: try_new ( & in_array) ?) ) ,
180+ // Utf8 types
180181 DataType :: Utf8 => Ok ( Arc :: new ( Utf8StaticFilter :: try_new ( & in_array) ?) ) ,
181182 DataType :: LargeUtf8 => Ok ( Arc :: new ( LargeUtf8StaticFilter :: try_new ( & in_array) ?) ) ,
182183 DataType :: Utf8View => Ok ( Arc :: new ( Utf8ViewStaticFilter :: try_new ( & in_array) ?) ) ,
@@ -187,6 +188,7 @@ fn instantiate_static_filter(
187188 }
188189}
189190
191+ /// Generates specialized [`StaticFilter`] implementations for string types (Utf8, LargeUtf8, Utf8View).
190192macro_rules! string_static_filter {
191193 ( $Name: ident, $ArrayType: ty, $TryDowncast: ident $( :: <$offset: ty>) ?) => {
192194 struct $Name {
@@ -4110,41 +4112,25 @@ mod tests {
41104112 ) ;
41114113 }
41124114
4113- let string_cases = vec ! [
4114- (
4115- "Utf8" ,
4116- Arc :: new( StringArray :: from( vec![ "a" , "b" , "c" ] ) ) as ArrayRef ,
4117- Arc :: new( StringArray :: from( vec![ "a" , "d" , "b" ] ) ) as ArrayRef ,
4118- ) ,
4119- (
4120- "LargeUtf8" ,
4121- Arc :: new( LargeStringArray :: from( vec![ "a" , "b" , "c" ] ) ) as ArrayRef ,
4122- Arc :: new( LargeStringArray :: from( vec![ "a" , "d" , "b" ] ) ) as ArrayRef ,
4123- ) ,
4124- (
4125- "Utf8View" ,
4126- Arc :: new( StringViewArray :: from( vec![ "a" , "b" , "c" ] ) ) as ArrayRef ,
4127- Arc :: new( StringViewArray :: from( vec![ "a" , "d" , "b" ] ) ) as ArrayRef ,
4128- ) ,
4129- ] ;
4130-
4131- for ( name, in_array, needle) in string_cases {
4132- assert_eq ! (
4133- expected,
4134- eval_in_list_from_array( Arc :: clone( & needle) , Arc :: clone( & in_array) , ) ?,
4135- "same-type failed for {name}"
4136- ) ;
4137-
4138- assert_eq ! (
4139- expected,
4140- eval_in_list_from_array( wrap_in_dict( needle) , in_array) ?,
4141- "dict-needle failed for {name}"
4142- ) ;
4143- }
4144-
4115+ // Utf8 (falls through to ArrayStaticFilter)
41454116 let utf8_in = Arc :: new ( StringArray :: from ( vec ! [ "a" , "b" , "c" ] ) ) as ArrayRef ;
41464117 let utf8_needle = Arc :: new ( StringArray :: from ( vec ! [ "a" , "d" , "b" ] ) ) as ArrayRef ;
41474118
4119+ // Utf8 in_array, Utf8 needle
4120+ assert_eq ! (
4121+ expected,
4122+ eval_in_list_from_array( Arc :: clone( & utf8_needle) , Arc :: clone( & utf8_in) , ) ?
4123+ ) ;
4124+
4125+ // Utf8 in_array, Dict(Utf8) needle
4126+ assert_eq ! (
4127+ expected,
4128+ eval_in_list_from_array(
4129+ wrap_in_dict( Arc :: clone( & utf8_needle) ) ,
4130+ Arc :: clone( & utf8_in) ,
4131+ ) ?
4132+ ) ;
4133+
41484134 // Dict(Utf8) in_array, Dict(Utf8) needle: the #20937 bug
41494135 assert_eq ! (
41504136 expected,
@@ -4248,27 +4234,4 @@ mod tests {
42484234
42494235 Ok ( ( ) )
42504236 }
4251-
4252- #[ test]
4253- fn test_utf8_static_filter_avoids_string_copies ( ) -> Result < ( ) > {
4254- let in_array = Arc :: new ( StringArray :: from ( vec ! [
4255- Some ( "alpha" ) ,
4256- Some ( "beta" ) ,
4257- Some ( "alpha" ) ,
4258- None ,
4259- ] ) ) as ArrayRef ;
4260-
4261- let filter = Utf8StaticFilter :: try_new ( & in_array) ?;
4262-
4263- assert_eq ! ( filter. null_count( ) , 1 ) ;
4264- assert_eq ! ( filter. map. len( ) , 2 ) ;
4265-
4266- let needle = Arc :: new ( StringArray :: from ( vec ! [ Some ( "alpha" ) , Some ( "gamma" ) , None ] ) )
4267- as ArrayRef ;
4268-
4269- let result = filter. contains ( needle. as_ref ( ) , false ) ?;
4270- assert_eq ! ( result, BooleanArray :: from( vec![ Some ( true ) , None , None ] ) ) ;
4271-
4272- Ok ( ( ) )
4273- }
42744237}
0 commit comments