Skip to content

Commit 370f6fb

Browse files
committed
fix off by one pattern highlights
1 parent 06c300b commit 370f6fb

2 files changed

Lines changed: 250 additions & 7 deletions

File tree

src/db/postgres.rs

Lines changed: 188 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1863,6 +1863,8 @@ ORDER BY idx
18631863
ctx.match_line_number,
18641864
ctx.snippet_start_line_number,
18651865
ctx.match_spans,
1866+
pf.highlight_pattern,
1867+
pf.highlight_case_sensitive,
18661868
pf.branches,
18671869
pf.live_branches,
18681870
pf.is_historical,
@@ -1968,14 +1970,20 @@ ORDER BY idx
19681970
let best_start_line = chunk_start_line
19691971
.saturating_add(best_row.snippet_start_line_number - 1);
19701972
let best_end_line = snippet_end_line(&best_row.content_text, best_start_line);
1973+
let best_match_spans = normalize_literal_match_spans(
1974+
&best_row.content_text,
1975+
&best_row.match_spans.0,
1976+
&best_row.highlight_pattern,
1977+
best_row.highlight_case_sensitive,
1978+
);
19711979

19721980
let mut snippets = Vec::new();
19731981
snippets.push(SearchSnippet {
19741982
start_line: best_start_line,
19751983
end_line: best_end_line,
19761984
match_line: best_match_line,
19771985
content_text: best_row.content_text.clone(),
1978-
match_spans: best_row.match_spans.0.clone(),
1986+
match_spans: best_match_spans.clone(),
19791987
});
19801988

19811989
for row in entries_iter {
@@ -1985,12 +1993,18 @@ ORDER BY idx
19851993
let snippet_start =
19861994
chunk_start_line.saturating_add(row.snippet_start_line_number - 1);
19871995
let snippet_end = snippet_end_line(&row.content_text, snippet_start);
1996+
let match_spans = normalize_literal_match_spans(
1997+
&row.content_text,
1998+
&row.match_spans.0,
1999+
&row.highlight_pattern,
2000+
row.highlight_case_sensitive,
2001+
);
19882002
snippets.push(SearchSnippet {
19892003
start_line: snippet_start,
19902004
end_line: snippet_end,
19912005
match_line: snippet_match,
19922006
content_text: row.content_text,
1993-
match_spans: row.match_spans.0,
2007+
match_spans,
19942008
});
19952009
}
19962010

@@ -2005,11 +2019,11 @@ ORDER BY idx
20052019
.or_else(|| merged_snippets.first().cloned())
20062020
.unwrap_or_else(|| SearchSnippet {
20072021
start_line: best_start_line,
2008-
end_line: best_end_line,
2009-
match_line: best_match_line,
2010-
content_text: best_row.content_text.clone(),
2011-
match_spans: best_row.match_spans.0.clone(),
2012-
});
2022+
end_line: best_end_line,
2023+
match_line: best_match_line,
2024+
content_text: best_row.content_text.clone(),
2025+
match_spans: best_match_spans,
2026+
});
20132027

20142028
SearchResult {
20152029
repository: best_row.repository,
@@ -2758,6 +2772,8 @@ struct SearchResultRow {
27582772
match_line_number: i32,
27592773
snippet_start_line_number: i32,
27602774
match_spans: Json<Vec<SearchMatchSpan>>,
2775+
highlight_pattern: String,
2776+
highlight_case_sensitive: bool,
27612777
branches: Vec<String>,
27622778
live_branches: Vec<String>,
27632779
is_historical: bool,
@@ -2845,6 +2861,100 @@ fn snippet_signal_score(text: &str, spans: &[SearchMatchSpan]) -> (i32, i32, i32
28452861
(exact_count, span_count, signal_count)
28462862
}
28472863

2864+
fn normalize_literal_match_spans(
2865+
text: &str,
2866+
spans: &[SearchMatchSpan],
2867+
pattern: &str,
2868+
case_sensitive: bool,
2869+
) -> Vec<SearchMatchSpan> {
2870+
let Some(terms) = parse_plain_highlight_pattern(pattern) else {
2871+
return spans.to_vec();
2872+
};
2873+
2874+
let Some(recomputed) = find_literal_match_spans(text, &terms, case_sensitive) else {
2875+
return spans.to_vec();
2876+
};
2877+
2878+
if recomputed.is_empty() {
2879+
spans.to_vec()
2880+
} else {
2881+
recomputed
2882+
}
2883+
}
2884+
2885+
fn parse_plain_highlight_pattern(pattern: &str) -> Option<Vec<String>> {
2886+
let mut terms = Vec::new();
2887+
let mut current = String::new();
2888+
let mut chars = pattern.chars();
2889+
2890+
while let Some(ch) = chars.next() {
2891+
match ch {
2892+
'\\' => {
2893+
let escaped = chars.next()?;
2894+
match escaped {
2895+
'\\' | '.' | '+' | '*' | '?' | '^' | '$' | '(' | ')' | '[' | ']' | '{'
2896+
| '}' | '|' => current.push(escaped),
2897+
_ => return None,
2898+
}
2899+
}
2900+
'|' => {
2901+
if current.is_empty() {
2902+
return None;
2903+
}
2904+
terms.push(std::mem::take(&mut current));
2905+
}
2906+
other => current.push(other),
2907+
}
2908+
}
2909+
2910+
if current.is_empty() {
2911+
return None;
2912+
}
2913+
terms.push(current);
2914+
Some(terms)
2915+
}
2916+
2917+
fn find_literal_match_spans(
2918+
text: &str,
2919+
terms: &[String],
2920+
case_sensitive: bool,
2921+
) -> Option<Vec<SearchMatchSpan>> {
2922+
if terms.is_empty() {
2923+
return Some(Vec::new());
2924+
}
2925+
2926+
let mut spans = Vec::new();
2927+
2928+
if case_sensitive {
2929+
for term in terms {
2930+
for (start, matched) in text.match_indices(term) {
2931+
spans.push(SearchMatchSpan {
2932+
start,
2933+
end: start + matched.len(),
2934+
});
2935+
}
2936+
}
2937+
} else {
2938+
if !text.is_ascii() || terms.iter().any(|term| !term.is_ascii()) {
2939+
return None;
2940+
}
2941+
let lower_text = text.to_ascii_lowercase();
2942+
for term in terms {
2943+
let lower_term = term.to_ascii_lowercase();
2944+
for (start, matched) in lower_text.match_indices(&lower_term) {
2945+
spans.push(SearchMatchSpan {
2946+
start,
2947+
end: start + matched.len(),
2948+
});
2949+
}
2950+
}
2951+
}
2952+
2953+
spans.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| a.end.cmp(&b.end)));
2954+
spans.dedup();
2955+
Some(spans)
2956+
}
2957+
28482958
fn count_exact_match_spans(text: &str, spans: &[SearchMatchSpan]) -> i32 {
28492959
let mut count = 0;
28502960
let bytes = text.as_bytes();
@@ -3130,6 +3240,77 @@ mod tests {
31303240
assert_eq!(merged_snippet.match_spans, vec![SearchMatchSpan { start: 14, end: 19 }]);
31313241
}
31323242

3243+
#[test]
3244+
fn merged_snippets_preserve_zero_based_end_exclusive_phrase_spans() {
3245+
let snippet_a = SearchSnippet {
3246+
start_line: 20,
3247+
end_line: 22,
3248+
match_line: 21,
3249+
content_text: "line20\nseek failed for block\nline22".to_string(),
3250+
match_spans: vec![SearchMatchSpan { start: 12, end: 28 }],
3251+
};
3252+
let snippet_b = SearchSnippet {
3253+
start_line: 23,
3254+
end_line: 24,
3255+
match_line: 23,
3256+
content_text: "write block with checksum\nline24".to_string(),
3257+
match_spans: vec![SearchMatchSpan { start: 0, end: 5 }],
3258+
};
3259+
3260+
let merged = merge_overlapping_snippets(vec![snippet_a, snippet_b]);
3261+
let merged_snippet = &merged[0];
3262+
3263+
assert_eq!(
3264+
&merged_snippet.content_text[merged_snippet.match_spans[0].start..merged_snippet.match_spans[0].end],
3265+
"failed for block"
3266+
);
3267+
assert_eq!(
3268+
&merged_snippet.content_text[merged_snippet.match_spans[1].start..merged_snippet.match_spans[1].end],
3269+
"write"
3270+
);
3271+
}
3272+
3273+
#[test]
3274+
fn parse_plain_highlight_pattern_round_trips_escaped_literals() {
3275+
let terms = parse_plain_highlight_pattern(r#"failed for block|pg_fatal\(\)"#)
3276+
.expect("pattern should parse as plain literals");
3277+
assert_eq!(
3278+
terms,
3279+
vec!["failed for block".to_string(), "pg_fatal()".to_string()]
3280+
);
3281+
}
3282+
3283+
#[test]
3284+
fn parse_plain_highlight_pattern_rejects_regex_constructs() {
3285+
assert!(parse_plain_highlight_pattern("foo.*bar").is_none());
3286+
}
3287+
3288+
#[test]
3289+
fn normalize_literal_match_spans_recomputes_shifted_plain_phrase() {
3290+
let text = r#"pg_fatal("seek failed for block %u", blockno);"#;
3291+
let original = vec![SearchMatchSpan { start: 17, end: 33 }];
3292+
3293+
let normalized =
3294+
normalize_literal_match_spans(text, &original, "failed for block", true);
3295+
3296+
let expected_start = text.find("failed for block").expect("phrase should exist");
3297+
assert_eq!(
3298+
normalized,
3299+
vec![SearchMatchSpan {
3300+
start: expected_start,
3301+
end: expected_start + "failed for block".len(),
3302+
}]
3303+
);
3304+
}
3305+
3306+
#[test]
3307+
fn normalize_literal_match_spans_preserves_regex_patterns() {
3308+
let original = vec![SearchMatchSpan { start: 5, end: 11 }];
3309+
let normalized =
3310+
normalize_literal_match_spans("abcde failed", &original, "fail.*", true);
3311+
assert_eq!(normalized, original);
3312+
}
3313+
31333314
#[test]
31343315
fn multi_term_search_builds_intersect_filter() {
31353316
let request = TextSearchRequest::from_query_str("polly LinkAllPasses").unwrap();

src/pages/search.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,7 @@ fn format_indexed_timestamp(ts: &str) -> Option<String> {
928928
#[cfg(test)]
929929
mod tests {
930930
use super::*;
931+
use crate::db::models::SearchMatchSpan;
931932

932933
#[test]
933934
fn split_query_tokens_preserves_quoted_filters() {
@@ -968,6 +969,67 @@ mod tests {
968969
)
969970
);
970971
}
972+
973+
#[test]
974+
fn segment_snippet_by_spans_highlights_exact_phrase() {
975+
let input = r#"pg_fatal("seek failed for block %u in file \"%s\": %m", blockno, fn);"#;
976+
let start = input.find("failed for block").expect("phrase should exist");
977+
let end = start + "failed for block".len();
978+
979+
let segments = segment_snippet_by_spans(
980+
input,
981+
&[SearchMatchSpan { start, end }],
982+
);
983+
984+
assert!(segments.iter().any(|(text, highlighted)| {
985+
*highlighted && text == "failed for block"
986+
}));
987+
}
988+
989+
#[test]
990+
fn segment_snippet_by_spans_uses_zero_based_end_exclusive_offsets() {
991+
let input = "failed for block";
992+
let segments = segment_snippet_by_spans(
993+
input,
994+
&[SearchMatchSpan {
995+
start: 0,
996+
end: input.len(),
997+
}],
998+
);
999+
1000+
assert_eq!(segments, vec![("failed for block".to_string(), true)]);
1001+
}
1002+
1003+
#[test]
1004+
fn segment_snippet_by_spans_rejects_non_char_boundary_spans() {
1005+
let input = "é failed";
1006+
let segments = segment_snippet_by_spans(
1007+
input,
1008+
&[SearchMatchSpan { start: 1, end: 8 }],
1009+
);
1010+
1011+
assert_eq!(segments, vec![(input.to_string(), false)]);
1012+
}
1013+
1014+
#[test]
1015+
fn segment_snippet_by_spans_handles_utf8_prefix_with_byte_offsets() {
1016+
let input = "é failed";
1017+
let start = input.find("failed").expect("phrase should exist");
1018+
let end = start + "failed".len();
1019+
1020+
let segments = segment_snippet_by_spans(
1021+
input,
1022+
&[SearchMatchSpan { start, end }],
1023+
);
1024+
1025+
assert_eq!(
1026+
segments,
1027+
vec![
1028+
("é ".to_string(), false),
1029+
("failed".to_string(), true),
1030+
]
1031+
);
1032+
}
9711033
}
9721034

9731035
fn submit_search<F>(navigate: &F, query_text: &RwSignal<String>, page: usize)

0 commit comments

Comments
 (0)