@@ -1863,6 +1863,8 @@ ORDER BY idx
18631863 ctx.match_line_number,
18641864 ctx.snippet_start_line_number,
18651865 ctx.match_spans,
1866+ pf.highlight_pattern,
1867+ pf.highlight_case_sensitive,
18661868 pf.branches,
18671869 pf.live_branches,
18681870 pf.is_historical,
@@ -1968,14 +1970,20 @@ ORDER BY idx
19681970 let best_start_line = chunk_start_line
19691971 . saturating_add ( best_row. snippet_start_line_number - 1 ) ;
19701972 let best_end_line = snippet_end_line ( & best_row. content_text , best_start_line) ;
1973+ let best_match_spans = normalize_literal_match_spans (
1974+ & best_row. content_text ,
1975+ & best_row. match_spans . 0 ,
1976+ & best_row. highlight_pattern ,
1977+ best_row. highlight_case_sensitive ,
1978+ ) ;
19711979
19721980 let mut snippets = Vec :: new ( ) ;
19731981 snippets. push ( SearchSnippet {
19741982 start_line : best_start_line,
19751983 end_line : best_end_line,
19761984 match_line : best_match_line,
19771985 content_text : best_row. content_text . clone ( ) ,
1978- match_spans : best_row . match_spans . 0 . clone ( ) ,
1986+ match_spans : best_match_spans . clone ( ) ,
19791987 } ) ;
19801988
19811989 for row in entries_iter {
@@ -1985,12 +1993,18 @@ ORDER BY idx
19851993 let snippet_start =
19861994 chunk_start_line. saturating_add ( row. snippet_start_line_number - 1 ) ;
19871995 let snippet_end = snippet_end_line ( & row. content_text , snippet_start) ;
1996+ let match_spans = normalize_literal_match_spans (
1997+ & row. content_text ,
1998+ & row. match_spans . 0 ,
1999+ & row. highlight_pattern ,
2000+ row. highlight_case_sensitive ,
2001+ ) ;
19882002 snippets. push ( SearchSnippet {
19892003 start_line : snippet_start,
19902004 end_line : snippet_end,
19912005 match_line : snippet_match,
19922006 content_text : row. content_text ,
1993- match_spans : row . match_spans . 0 ,
2007+ match_spans,
19942008 } ) ;
19952009 }
19962010
@@ -2005,11 +2019,11 @@ ORDER BY idx
20052019 . or_else ( || merged_snippets. first ( ) . cloned ( ) )
20062020 . unwrap_or_else ( || SearchSnippet {
20072021 start_line : best_start_line,
2008- end_line : best_end_line,
2009- match_line : best_match_line,
2010- content_text : best_row. content_text . clone ( ) ,
2011- match_spans : best_row . match_spans . 0 . clone ( ) ,
2012- } ) ;
2022+ end_line : best_end_line,
2023+ match_line : best_match_line,
2024+ content_text : best_row. content_text . clone ( ) ,
2025+ match_spans : best_match_spans ,
2026+ } ) ;
20132027
20142028 SearchResult {
20152029 repository : best_row. repository ,
@@ -2758,6 +2772,8 @@ struct SearchResultRow {
27582772 match_line_number : i32 ,
27592773 snippet_start_line_number : i32 ,
27602774 match_spans : Json < Vec < SearchMatchSpan > > ,
2775+ highlight_pattern : String ,
2776+ highlight_case_sensitive : bool ,
27612777 branches : Vec < String > ,
27622778 live_branches : Vec < String > ,
27632779 is_historical : bool ,
@@ -2845,6 +2861,100 @@ fn snippet_signal_score(text: &str, spans: &[SearchMatchSpan]) -> (i32, i32, i32
28452861 ( exact_count, span_count, signal_count)
28462862}
28472863
2864+ fn normalize_literal_match_spans (
2865+ text : & str ,
2866+ spans : & [ SearchMatchSpan ] ,
2867+ pattern : & str ,
2868+ case_sensitive : bool ,
2869+ ) -> Vec < SearchMatchSpan > {
2870+ let Some ( terms) = parse_plain_highlight_pattern ( pattern) else {
2871+ return spans. to_vec ( ) ;
2872+ } ;
2873+
2874+ let Some ( recomputed) = find_literal_match_spans ( text, & terms, case_sensitive) else {
2875+ return spans. to_vec ( ) ;
2876+ } ;
2877+
2878+ if recomputed. is_empty ( ) {
2879+ spans. to_vec ( )
2880+ } else {
2881+ recomputed
2882+ }
2883+ }
2884+
2885+ fn parse_plain_highlight_pattern ( pattern : & str ) -> Option < Vec < String > > {
2886+ let mut terms = Vec :: new ( ) ;
2887+ let mut current = String :: new ( ) ;
2888+ let mut chars = pattern. chars ( ) ;
2889+
2890+ while let Some ( ch) = chars. next ( ) {
2891+ match ch {
2892+ '\\' => {
2893+ let escaped = chars. next ( ) ?;
2894+ match escaped {
2895+ '\\' | '.' | '+' | '*' | '?' | '^' | '$' | '(' | ')' | '[' | ']' | '{'
2896+ | '}' | '|' => current. push ( escaped) ,
2897+ _ => return None ,
2898+ }
2899+ }
2900+ '|' => {
2901+ if current. is_empty ( ) {
2902+ return None ;
2903+ }
2904+ terms. push ( std:: mem:: take ( & mut current) ) ;
2905+ }
2906+ other => current. push ( other) ,
2907+ }
2908+ }
2909+
2910+ if current. is_empty ( ) {
2911+ return None ;
2912+ }
2913+ terms. push ( current) ;
2914+ Some ( terms)
2915+ }
2916+
2917+ fn find_literal_match_spans (
2918+ text : & str ,
2919+ terms : & [ String ] ,
2920+ case_sensitive : bool ,
2921+ ) -> Option < Vec < SearchMatchSpan > > {
2922+ if terms. is_empty ( ) {
2923+ return Some ( Vec :: new ( ) ) ;
2924+ }
2925+
2926+ let mut spans = Vec :: new ( ) ;
2927+
2928+ if case_sensitive {
2929+ for term in terms {
2930+ for ( start, matched) in text. match_indices ( term) {
2931+ spans. push ( SearchMatchSpan {
2932+ start,
2933+ end : start + matched. len ( ) ,
2934+ } ) ;
2935+ }
2936+ }
2937+ } else {
2938+ if !text. is_ascii ( ) || terms. iter ( ) . any ( |term| !term. is_ascii ( ) ) {
2939+ return None ;
2940+ }
2941+ let lower_text = text. to_ascii_lowercase ( ) ;
2942+ for term in terms {
2943+ let lower_term = term. to_ascii_lowercase ( ) ;
2944+ for ( start, matched) in lower_text. match_indices ( & lower_term) {
2945+ spans. push ( SearchMatchSpan {
2946+ start,
2947+ end : start + matched. len ( ) ,
2948+ } ) ;
2949+ }
2950+ }
2951+ }
2952+
2953+ spans. sort_by ( |a, b| a. start . cmp ( & b. start ) . then_with ( || a. end . cmp ( & b. end ) ) ) ;
2954+ spans. dedup ( ) ;
2955+ Some ( spans)
2956+ }
2957+
28482958fn count_exact_match_spans ( text : & str , spans : & [ SearchMatchSpan ] ) -> i32 {
28492959 let mut count = 0 ;
28502960 let bytes = text. as_bytes ( ) ;
@@ -3130,6 +3240,77 @@ mod tests {
31303240 assert_eq ! ( merged_snippet. match_spans, vec![ SearchMatchSpan { start: 14 , end: 19 } ] ) ;
31313241 }
31323242
3243+ #[ test]
3244+ fn merged_snippets_preserve_zero_based_end_exclusive_phrase_spans ( ) {
3245+ let snippet_a = SearchSnippet {
3246+ start_line : 20 ,
3247+ end_line : 22 ,
3248+ match_line : 21 ,
3249+ content_text : "line20\n seek failed for block\n line22" . to_string ( ) ,
3250+ match_spans : vec ! [ SearchMatchSpan { start: 12 , end: 28 } ] ,
3251+ } ;
3252+ let snippet_b = SearchSnippet {
3253+ start_line : 23 ,
3254+ end_line : 24 ,
3255+ match_line : 23 ,
3256+ content_text : "write block with checksum\n line24" . to_string ( ) ,
3257+ match_spans : vec ! [ SearchMatchSpan { start: 0 , end: 5 } ] ,
3258+ } ;
3259+
3260+ let merged = merge_overlapping_snippets ( vec ! [ snippet_a, snippet_b] ) ;
3261+ let merged_snippet = & merged[ 0 ] ;
3262+
3263+ assert_eq ! (
3264+ & merged_snippet. content_text[ merged_snippet. match_spans[ 0 ] . start..merged_snippet. match_spans[ 0 ] . end] ,
3265+ "failed for block"
3266+ ) ;
3267+ assert_eq ! (
3268+ & merged_snippet. content_text[ merged_snippet. match_spans[ 1 ] . start..merged_snippet. match_spans[ 1 ] . end] ,
3269+ "write"
3270+ ) ;
3271+ }
3272+
3273+ #[ test]
3274+ fn parse_plain_highlight_pattern_round_trips_escaped_literals ( ) {
3275+ let terms = parse_plain_highlight_pattern ( r#"failed for block|pg_fatal\(\)"# )
3276+ . expect ( "pattern should parse as plain literals" ) ;
3277+ assert_eq ! (
3278+ terms,
3279+ vec![ "failed for block" . to_string( ) , "pg_fatal()" . to_string( ) ]
3280+ ) ;
3281+ }
3282+
3283+ #[ test]
3284+ fn parse_plain_highlight_pattern_rejects_regex_constructs ( ) {
3285+ assert ! ( parse_plain_highlight_pattern( "foo.*bar" ) . is_none( ) ) ;
3286+ }
3287+
3288+ #[ test]
3289+ fn normalize_literal_match_spans_recomputes_shifted_plain_phrase ( ) {
3290+ let text = r#"pg_fatal("seek failed for block %u", blockno);"# ;
3291+ let original = vec ! [ SearchMatchSpan { start: 17 , end: 33 } ] ;
3292+
3293+ let normalized =
3294+ normalize_literal_match_spans ( text, & original, "failed for block" , true ) ;
3295+
3296+ let expected_start = text. find ( "failed for block" ) . expect ( "phrase should exist" ) ;
3297+ assert_eq ! (
3298+ normalized,
3299+ vec![ SearchMatchSpan {
3300+ start: expected_start,
3301+ end: expected_start + "failed for block" . len( ) ,
3302+ } ]
3303+ ) ;
3304+ }
3305+
3306+ #[ test]
3307+ fn normalize_literal_match_spans_preserves_regex_patterns ( ) {
3308+ let original = vec ! [ SearchMatchSpan { start: 5 , end: 11 } ] ;
3309+ let normalized =
3310+ normalize_literal_match_spans ( "abcde failed" , & original, "fail.*" , true ) ;
3311+ assert_eq ! ( normalized, original) ;
3312+ }
3313+
31333314 #[ test]
31343315 fn multi_term_search_builds_intersect_filter ( ) {
31353316 let request = TextSearchRequest :: from_query_str ( "polly LinkAllPasses" ) . unwrap ( ) ;
0 commit comments