Skip to content

Commit eb0eb1a

Browse files
committed
mcp max byte request size and better symbol rank
1 parent cb4546a commit eb0eb1a

4 files changed

Lines changed: 188 additions & 24 deletions

File tree

src/db/postgres.rs

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -482,17 +482,31 @@ fn push_search_ctes<'a>(
482482
} else {
483483
qb.push(
484484
"
485+
candidate_content_hashes AS MATERIALIZED (
486+
SELECT DISTINCT sf.content_hash
487+
FROM scored_files sf
488+
),
489+
candidate_symbols AS MATERIALIZED (
490+
SELECT
491+
cch.content_hash,
492+
s.id AS symbol_id,
493+
s.name_lc
494+
FROM candidate_content_hashes cch
495+
JOIN symbols s
496+
ON s.content_hash = cch.content_hash
497+
),
485498
symbol_term_matches AS MATERIALIZED (
486499
SELECT
487500
term,
488-
us.name_lc
501+
cs.content_hash,
502+
cs.name_lc
489503
FROM UNNEST(",
490504
);
491505
qb.push_bind(symbol_terms);
492506
qb.push(
493507
") AS term
494-
JOIN unique_symbols us
495-
ON us.name_lc LIKE '%' || term || '%'
508+
JOIN candidate_symbols cs
509+
ON cs.name_lc LIKE '%' || term || '%'
496510
),
497511
symbol_scores AS (
498512
SELECT
@@ -507,8 +521,8 @@ fn push_search_ctes<'a>(
507521
END
508522
) AS score
509523
FROM symbol_term_matches stm
510-
JOIN symbols s ON s.name_lc = stm.name_lc
511-
JOIN scored_files sf ON sf.content_hash = s.content_hash
524+
JOIN scored_files sf
525+
ON sf.content_hash = stm.content_hash
512526
GROUP BY sf.file_id, sf.content_hash
513527
),
514528
definition_scores AS (
@@ -517,24 +531,24 @@ fn push_search_ctes<'a>(
517531
sf.content_hash,
518532
MAX(
519533
CASE
520-
WHEN s.name_lc = query_term.term THEN 2
534+
WHEN cs.name_lc = query_term.term THEN 2
521535
ELSE 1
522536
END
523537
)::INT AS definition_matches
524538
FROM scored_files sf
525-
JOIN symbols s
526-
ON s.content_hash = sf.content_hash
539+
JOIN candidate_symbols cs
540+
ON cs.content_hash = sf.content_hash
527541
JOIN UNNEST(
528542
",
529543
);
530544
qb.push_bind(definition_terms);
531545
qb.push(
532546
") AS query_term(term)
533-
ON s.name_lc = query_term.term
534-
OR s.name_lc LIKE query_term.term || '%'
547+
ON cs.name_lc = query_term.term
548+
OR cs.name_lc LIKE query_term.term || '%'
535549
JOIN symbol_references sr
536550
ON sr.kind = 'definition'
537-
AND sr.symbol_id = s.id
551+
AND sr.symbol_id = cs.symbol_id
538552
GROUP BY sf.file_id, sf.content_hash
539553
),
540554
top_files AS (
@@ -3653,10 +3667,12 @@ mod tests {
36533667
let request = TextSearchRequest::from_query_str("polly").unwrap();
36543668
let sql = build_phase1_sql(&request);
36553669

3670+
assert!(sql.contains("candidate_symbols AS MATERIALIZED"));
36563671
assert!(sql.contains("definition_scores AS"));
36573672
assert!(sql.contains("sr.kind = 'definition'"));
36583673
assert!(sql.contains("definition_matches"));
3659-
assert!(sql.contains("s.name_lc LIKE query_term.term || '%'"));
3674+
assert!(sql.contains("cs.name_lc LIKE query_term.term || '%'"));
3675+
assert!(!sql.contains("JOIN unique_symbols"));
36603676
}
36613677

36623678
#[test]

src/mcp/server.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,8 @@ fn mcp_tools() -> Vec<Value> {
423423
"enum": ["repo_path_line", "repo_path", "none"],
424424
"description": "Used when any_terms fanout is active."
425425
},
426-
"max_results_per_query": { "type": "integer", "minimum": 1, "maximum": 100 }
426+
"max_results_per_query": { "type": "integer", "minimum": 1, "maximum": 100 },
427+
"max_bytes": { "type": "integer", "minimum": 1, "maximum": 1048576, "description": "Optional cap on the serialized `results` payload in bytes. Results are truncated from the tail when this limit is reached." }
427428
},
428429
"examples": [
429430
{ "repo": "pointer", "branch": "main", "all_terms": ["search"] },

src/mcp/tools.rs

Lines changed: 156 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const MAX_AUTO_TOTAL_FILE_LIST_ENTRIES: usize = 5000;
2727

2828
pub async fn execute_search(payload: SearchToolRequest) -> Result<Value, String> {
2929
let request_echo = search_request_echo(&payload, None);
30+
let max_bytes = normalize_max_bytes(payload.max_bytes);
3031
let mode = build_search_execution_mode(&payload)?;
3132
match mode {
3233
SearchExecutionMode::Single { query, page } => {
@@ -36,14 +37,24 @@ pub async fn execute_search(payload: SearchToolRequest) -> Result<Value, String>
3637
request_echo,
3738
payload.repo.clone(),
3839
payload.branch.clone(),
40+
max_bytes,
3941
)
4042
.await
4143
}
4244
SearchExecutionMode::Batch {
4345
queries,
4446
dedupe,
4547
max_results_per_query,
46-
} => execute_batch_search(queries, dedupe, max_results_per_query, request_echo).await,
48+
} => {
49+
execute_batch_search(
50+
queries,
51+
dedupe,
52+
max_results_per_query,
53+
request_echo,
54+
max_bytes,
55+
)
56+
.await
57+
}
4758
}
4859
}
4960

@@ -671,6 +682,7 @@ async fn execute_single_search(
671682
request_echo: Value,
672683
repo: Option<String>,
673684
branch: Option<String>,
685+
max_bytes: Option<usize>,
674686
) -> Result<Value, String> {
675687
tracing::trace!(
676688
target: "pointer::mcp_search",
@@ -692,29 +704,45 @@ async fn execute_single_search(
692704
}
693705

694706
let enriched_results = enrich_results(&page_data.results);
707+
let truncation_state = truncate_results_by_max_bytes(enriched_results, max_bytes)?;
695708
let top_filetypes = compute_top_filetypes(&page_data.results);
696709
let mut guidance = Vec::new();
697-
if page_data.results.is_empty() {
710+
if truncation_state.results.is_empty() {
711+
if page_data.results.is_empty() {
712+
guidance.extend(build_no_result_guidance());
713+
} else {
714+
guidance.push(
715+
"max_bytes is smaller than the first result payload. Increase max_bytes or narrow the search."
716+
.to_string(),
717+
);
718+
}
719+
} else if page_data.results.is_empty() {
698720
guidance.extend(build_no_result_guidance());
721+
} else if truncation_state.truncated_by_max_bytes {
722+
guidance.push(
723+
"Results were truncated to satisfy max_bytes. Increase max_bytes or narrow the search to inspect omitted matches."
724+
.to_string(),
725+
);
699726
} else if page_data.has_more {
700727
guidance.push(
701728
"Results are truncated for this page. Reuse truncation.next_page_args as the next structured search call."
702729
.to_string(),
703730
);
704731
}
705-
let next_page_args = if page_data.has_more {
732+
let next_page_args = if page_data.has_more && !truncation_state.truncated_by_max_bytes {
706733
search_request_echo_from_value(&request_echo, page + 1)
707734
} else {
708735
Value::Null
709736
};
737+
let has_more = page_data.has_more || truncation_state.truncated_by_max_bytes;
710738

711739
Ok(json!({
712740
"mode": "single",
713741
"request_echo": request_echo,
714742
"page": page_data.page,
715743
"page_size": page_data.page_size,
716-
"has_more": page_data.has_more,
717-
"results": enriched_results,
744+
"has_more": has_more,
745+
"results": truncation_state.results,
718746
"stats": page_data.stats,
719747
"facets": {
720748
"common_directories": page_data.stats.common_directories,
@@ -724,8 +752,11 @@ async fn execute_single_search(
724752
},
725753
"index_freshness": freshness,
726754
"truncation": {
727-
"has_more": page_data.has_more,
755+
"has_more": has_more,
728756
"next_page_args": next_page_args,
757+
"max_bytes": truncation_state.max_bytes,
758+
"returned_bytes": truncation_state.returned_bytes,
759+
"truncated_by_max_bytes": truncation_state.truncated_by_max_bytes,
729760
},
730761
"guidance": guidance,
731762
}))
@@ -736,6 +767,7 @@ async fn execute_batch_search(
736767
dedupe: SearchDedupeMode,
737768
max_results_per_query: usize,
738769
request_echo: Value,
770+
max_bytes: Option<usize>,
739771
) -> Result<Value, String> {
740772
tracing::trace!(
741773
target: "pointer::mcp_search",
@@ -775,7 +807,29 @@ async fn execute_batch_search(
775807
let deduped_results = dedupe_results(all_results, dedupe.clone());
776808
let freshness = freshness_from_search_results(&deduped_results);
777809
let top_filetypes = compute_top_filetypes(&deduped_results);
778-
let guidance = if deduped_results.is_empty() {
810+
let truncation_state =
811+
truncate_results_by_max_bytes(enrich_results(&deduped_results), max_bytes)?;
812+
let guidance = if truncation_state.results.is_empty() {
813+
if deduped_results.is_empty() {
814+
vec![
815+
"No matches found in this batch. Broaden terms or remove restrictive filters."
816+
.to_string(),
817+
"For OR semantics, keep separate alternatives in any_terms and inspect per_query_counts."
818+
.to_string(),
819+
"For older snapshots, include historical:true and rerun per branch.".to_string(),
820+
]
821+
} else {
822+
vec![
823+
"max_bytes is smaller than the first result payload. Increase max_bytes or narrow the search."
824+
.to_string(),
825+
]
826+
}
827+
} else if truncation_state.truncated_by_max_bytes {
828+
vec![
829+
"Batch results were truncated to satisfy max_bytes. Increase max_bytes or split any_terms into smaller calls."
830+
.to_string(),
831+
]
832+
} else if deduped_results.is_empty() {
779833
vec![
780834
"No matches found in this batch. Broaden terms or remove restrictive filters."
781835
.to_string(),
@@ -786,29 +840,92 @@ async fn execute_batch_search(
786840
} else {
787841
Vec::new()
788842
};
843+
let has_more = any_has_more || truncation_state.truncated_by_max_bytes;
789844

790845
Ok(json!({
791846
"mode": "batch",
792847
"request_echo": request_echo,
793848
"dedupe": dedupe,
794-
"results": enrich_results(&deduped_results),
849+
"results": truncation_state.results,
795850
"facets": {
796851
"top_filetypes": top_filetypes,
797852
},
798853
"index_freshness": freshness,
799854
"batch": {
800855
"per_query_counts": per_query_counts,
801856
"deduped_count": deduped_results.len(),
802-
"truncated": any_has_more,
857+
"truncated": has_more,
803858
},
804859
"truncation": {
805-
"has_more": any_has_more,
806-
"next_step_hint": if any_has_more { "Run single-query search with page>1 for the query of interest." } else { "" }
860+
"has_more": has_more,
861+
"next_step_hint": if truncation_state.truncated_by_max_bytes {
862+
"Increase max_bytes or split any_terms into smaller calls to inspect omitted results."
863+
} else if any_has_more {
864+
"Run single-query search with page>1 for the query of interest."
865+
} else {
866+
""
867+
},
868+
"max_bytes": truncation_state.max_bytes,
869+
"returned_bytes": truncation_state.returned_bytes,
870+
"truncated_by_max_bytes": truncation_state.truncated_by_max_bytes
807871
},
808872
"guidance": guidance,
809873
}))
810874
}
811875

876+
#[derive(Debug)]
877+
struct ResultsByteTruncation {
878+
results: Vec<Value>,
879+
max_bytes: Option<usize>,
880+
returned_bytes: usize,
881+
truncated_by_max_bytes: bool,
882+
}
883+
884+
fn normalize_max_bytes(max_bytes: Option<u32>) -> Option<usize> {
885+
max_bytes.map(|value| value.max(1) as usize)
886+
}
887+
888+
fn truncate_results_by_max_bytes(
889+
results: Vec<Value>,
890+
max_bytes: Option<usize>,
891+
) -> Result<ResultsByteTruncation, String> {
892+
let Some(max_bytes) = max_bytes else {
893+
let returned_bytes = serde_json::to_vec(&results)
894+
.map(|bytes| bytes.len())
895+
.map_err(|err| err.to_string())?;
896+
return Ok(ResultsByteTruncation {
897+
results,
898+
max_bytes: None,
899+
returned_bytes,
900+
truncated_by_max_bytes: false,
901+
});
902+
};
903+
904+
let mut kept = Vec::with_capacity(results.len());
905+
let mut used_bytes = 2usize;
906+
let mut truncated = false;
907+
908+
for result in results {
909+
let result_bytes = serde_json::to_vec(&result)
910+
.map(|bytes| bytes.len())
911+
.map_err(|err| err.to_string())?;
912+
let separator_bytes = usize::from(!kept.is_empty());
913+
if used_bytes + separator_bytes + result_bytes > max_bytes {
914+
truncated = true;
915+
break;
916+
}
917+
used_bytes += separator_bytes + result_bytes;
918+
kept.push(result);
919+
}
920+
921+
Ok(ResultsByteTruncation {
922+
results: kept,
923+
max_bytes: Some(max_bytes),
924+
returned_bytes: used_bytes,
925+
truncated_by_max_bytes: truncated,
926+
})
927+
}
928+
812929
fn dedupe_results(results: Vec<SearchResult>, dedupe: SearchDedupeMode) -> Vec<SearchResult> {
813930
if dedupe == SearchDedupeMode::None {
814931
return results;
@@ -966,6 +1083,9 @@ fn search_request_echo(payload: &SearchToolRequest, page_override: Option<u32>)
9661083
);
9671084
}
9681085
}
1086+
if let Some(max_bytes) = payload.max_bytes {
1087+
out.insert("max_bytes".to_string(), json!(max_bytes.max(1)));
1088+
}
9691089

9701090
let page = page_override.unwrap_or_else(|| payload.page.max(1));
9711091
if page > 1 {
@@ -1332,6 +1452,7 @@ mod tests {
13321452
page: 1,
13331453
dedupe: SearchDedupeMode::RepoPath,
13341454
max_results_per_query: 25,
1455+
max_bytes: Some(4096),
13351456
};
13361457

13371458
let echo = search_request_echo(&payload, Some(2));
@@ -1341,10 +1462,34 @@ mod tests {
13411462
assert_eq!(echo["regex"], "\\bQueryParser\\(");
13421463
assert_eq!(echo["all_terms"], json!(["symbol", "resolver"]));
13431464
assert_eq!(echo["any_terms"], json!(["panic", "unwrap"]));
1465+
assert_eq!(echo["max_bytes"], 4096);
13441466
assert_eq!(echo["page"], 2);
13451467
assert!(echo.get("query").is_none());
13461468
}
13471469

1470+
#[test]
1471+
fn truncate_results_by_max_bytes_keeps_prefix_within_limit() {
1472+
let results = vec![
1473+
json!({ "content_text": "a".repeat(32) }),
1474+
json!({ "content_text": "b".repeat(32) }),
1475+
];
1476+
1477+
let truncated = truncate_results_by_max_bytes(results, Some(64)).expect("truncate");
1478+
assert_eq!(truncated.results.len(), 1);
1479+
assert!(truncated.truncated_by_max_bytes);
1480+
assert!(truncated.returned_bytes <= 64);
1481+
}
1482+
1483+
#[test]
1484+
fn truncate_results_by_max_bytes_can_drop_all_results() {
1485+
let results = vec![json!({ "content_text": "a".repeat(32) })];
1486+
1487+
let truncated = truncate_results_by_max_bytes(results, Some(8)).expect("truncate");
1488+
assert!(truncated.results.is_empty());
1489+
assert!(truncated.truncated_by_max_bytes);
1490+
assert_eq!(truncated.returned_bytes, 2);
1491+
}
1492+
13481493
#[test]
13491494
fn normalize_repo_path_collapses_slashes_and_trims() {
13501495
assert_eq!(normalize_repo_path("//src///mcp//".to_string()), "src/mcp");

0 commit comments

Comments
 (0)