Skip to content

Commit 6644107

Browse files
committed
Improve symbol_insights and path_search
Better instructions and more clear functionality and better tool schema
1 parent bb3099b commit 6644107

4 files changed

Lines changed: 245 additions & 26 deletions

File tree

src/mcp/server.rs

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ fn mcp_docs_payload() -> Value {
9090
"or_support": "Use any_terms:[...] for OR semantics. all_terms are ANDed.",
9191
"wildcards": "Use path/file as glob-like filters. all_terms and any_terms are literal terms, not wildcard patterns.",
9292
"regex": "Use the regex field for content regex matching. Provide only the pattern string and JSON-escape backslashes.",
93-
"path_search_behavior": "path_search requires a non-empty plain fuzzy query and is for fuzzy path matching only.",
93+
"path_search_behavior": "path_search requires a non-empty plain query string and performs case-insensitive substring matching on paths only.",
9494
"file_list_behavior": "file_list enumerates directories and files with optional recursive depth and limit. Use path as a directory prefix, not a search query.",
9595
"file_content_behavior": "file_content supports optional start_line/end_line (1-based, inclusive) to return snippets instead of full files.",
9696
"recency_workflow": "For recent or older change questions: repositories -> repo_branches -> search by branch and compare indexed_at or is_live.",
@@ -116,7 +116,7 @@ fn mcp_docs_payload() -> Value {
116116
"No branch results: call repo_branches and use the exact branch name.",
117117
"Need OR behavior: place alternatives in any_terms:[\"termA\",\"termB\"].",
118118
"Need regex matching: set the regex field instead of using wildcard plain terms.",
119-
"Need directory listing: use file_list. Need fuzzy path lookup: use path_search."
119+
"Need directory listing: use file_list. Need case-insensitive substring path lookup: use path_search."
120120
]
121121
},
122122
"cookbook": [
@@ -126,7 +126,7 @@ fn mcp_docs_payload() -> Value {
126126
"4) search({repo, branch, historical:true, all_terms:[\"term\"]}) for older snapshots",
127127
"5) search({repo, regex:\"\\\\bQueryParser\\\\(\"}) for regex matching",
128128
"6) file_list({repo, branch, path:\"src/mcp\", depth:2}) for directory enumeration",
129-
"7) path_search({repo, branch, query:\"mcp serv\"}) for fuzzy path lookup",
129+
"7) path_search({repo, branch, query:\"mcp_serv\"}) for case-insensitive substring path lookup",
130130
"8) file_content({repo, branch, path, start_line?, end_line?}) for raw source text or snippets",
131131
"9) For large files, prefer file_content with line snippets first, then expand only if needed",
132132
"10) symbol_insights({params:{...}}) for definitions and references",
@@ -199,7 +199,7 @@ async fn mcp_rpc(Json(req): Json<JsonRpcRequest>) -> Response {
199199
"name": "pointer-mcp",
200200
"version": env!("CARGO_PKG_VERSION"),
201201
},
202-
"instructions": "Use tools to query indexed code and symbol information; do not fall back to local filesystem reads for indexed lookup. Operational flow: repositories -> repo_branches -> file_list/path_search -> file_content/search/symbol_insights. search accepts structured JSON fields only; do not send a free-form `query` string. Keep filter values plain: do not include prefixes like `repo:`, `path:`, or `regex:` inside field values. all_terms are AND semantics and any_terms are OR semantics (fanout + dedupe). For recency/version questions like 'recent change', call repo_branches first, then run search with explicit branch values and compare indexed_at/is_live metadata; add historical:true when historical snapshots should be included. Plain terms do not support wildcard matching; use the regex field for pattern matching. path_search requires a non-empty plain fuzzy query and is not a directory listing endpoint; use file_list for enumeration. For large files, call file_content with start_line/end_line first to limit context size.",
202+
"instructions": "Use tools to query indexed code and symbol information; do not fall back to local filesystem reads for indexed lookup. Operational flow: repositories -> repo_branches -> file_list/path_search -> file_content/search/symbol_insights. search accepts structured JSON fields only; do not send a free-form `query` string. Keep filter values plain: do not include prefixes like `repo:`, `path:`, or `regex:` inside field values. all_terms are AND semantics and any_terms are OR semantics (fanout + dedupe). For recency/version questions like 'recent change', call repo_branches first, then run search with explicit branch values and compare indexed_at/is_live metadata; add historical:true when historical snapshots should be included. Plain terms do not support wildcard matching; use the regex field for pattern matching. path_search requires a non-empty plain query string and performs case-insensitive substring matching over paths; it is not a directory listing endpoint, so use file_list for enumeration. For large files, call file_content with start_line/end_line first to limit context size.",
203203
});
204204
jsonrpc_result(req.id, result)
205205
}
@@ -338,7 +338,7 @@ fn mcp_tools() -> Vec<Value> {
338338
"repo": { "type": "string", "description": "Exact repository key from repositories. Example: \"pointer\"." },
339339
"branch": { "type": "string", "description": "Exact branch name from repo_branches. Example: \"main\"." },
340340
"lang": { "type": "string", "description": "Language filter. Example: \"rust\"." },
341-
"path": { "type": "string", "description": "Glob-like path filter only. Example: \"src/mcp/**\". Do not use this for fuzzy lookup." },
341+
"path": { "type": "string", "description": "Glob-like path filter only. Example: \"src/mcp/**\". Do not use this for path substring lookup." },
342342
"file": { "type": "string", "description": "Glob-like filename/path filter. Example: \"*.rs\"." },
343343
"regex": { "type": "string", "description": "Content regex pattern only. Do not prefix with `regex:`. JSON-escape backslashes, for example \"\\\\bQueryParser\\\\(\"." },
344344
"case": { "type": "string", "enum": ["yes", "no", "auto"] },
@@ -404,36 +404,48 @@ fn mcp_tools() -> Vec<Value> {
404404
}),
405405
json!({
406406
"name": "file_list",
407-
"description": "Enumerate files/directories under a path for a repository+branch from the index. Supports bounded recursive traversal with depth and limit. Use this for directory listing workflows and then call file_content on specific files. `path` is a directory prefix, not a fuzzy search query. Response includes truncated flag, branch freshness, and stable paths.",
407+
"description": "Enumerate files/directories under a path for a repository+branch from the index. Supports bounded recursive traversal with depth and limit. Use this for directory listing workflows and then call file_content on specific files. `path` is a directory prefix, not a substring search query. Optional cursor/auto_paginate can fetch more results while preserving truncation and narrowing guidance. Response includes truncated flag, branch freshness, and stable paths.",
408408
"inputSchema": {
409409
"type": "object",
410410
"properties": {
411411
"repo": { "type": "string" },
412412
"branch": { "type": "string" },
413-
"path": { "type": "string", "description": "Directory prefix to enumerate from. Example: \"src/mcp\". Do not send a fuzzy query here." },
413+
"path": { "type": "string", "description": "Directory prefix to enumerate from. Example: \"src/mcp\". Do not send a substring search query here." },
414414
"depth": { "type": "integer", "minimum": 1, "maximum": 10 },
415-
"limit": { "type": "integer", "minimum": 1, "maximum": 5000 }
415+
"limit": { "type": "integer", "minimum": 1, "maximum": 5000 },
416+
"cursor": { "type": "string", "description": "Optional pagination cursor from prior file_list response." },
417+
"auto_paginate": { "type": "boolean", "description": "If true, server attempts to return multiple pages up to max_pages/max_total_entries." },
418+
"max_pages": { "type": "integer", "minimum": 1, "maximum": 5 },
419+
"max_total_entries": { "type": "integer", "minimum": 1, "maximum": 5000 }
416420
},
417421
"examples": [
418-
{ "repo": "pointer", "branch": "main", "path": "src/mcp", "depth": 2, "limit": 100 }
422+
{ "repo": "pointer", "branch": "main", "path": "src/mcp", "depth": 2, "limit": 100 },
423+
{ "repo": "pointer", "branch": "main", "path": "src", "limit": 100, "cursor": "100" },
424+
{ "repo": "pointer", "branch": "main", "path": "src", "limit": 100, "auto_paginate": true, "max_pages": 3, "max_total_entries": 250 }
419425
],
420426
"required": ["repo", "branch"],
421427
"additionalProperties": false
422428
}
423429
}),
424430
json!({
425431
"name": "path_search",
426-
"description": "Search file and directory paths within a repository and branch using a non-empty plain fuzzy query (fuzzy path lookup). This is path-only matching and does not enumerate full directory contents; use file_list for enumeration and file_content for file bodies. Do not send filter syntax like `path:` or glob patterns here. Includes freshness metadata.",
432+
"description": "Search file and directory paths within a repository and branch using a non-empty plain query string. Matching is case-insensitive substring matching on path text (single query string, not OR-token search). This is path-only matching and does not enumerate full directory contents; use file_list for enumeration and file_content for file bodies. Do not send filter syntax like `path:` or glob patterns here. Optional cursor/auto_paginate can fetch additional path matches. Includes freshness metadata.",
427433
"inputSchema": {
428434
"type": "object",
429435
"properties": {
430436
"repo": { "type": "string" },
431437
"branch": { "type": "string" },
432-
"query": { "type": "string", "description": "Plain fuzzy text only. Example: \"mcp serv\". Do not send `path:src/mcp` or glob syntax here." },
433-
"limit": { "type": "integer", "minimum": 1, "maximum": 50 }
438+
"query": { "type": "string", "description": "Plain query string only. Example: \"mcp_serv\". Matching is case-insensitive substring matching over path text. This is a single query string, not OR token search. Do not send `path:src/mcp` or glob syntax here." },
439+
"limit": { "type": "integer", "minimum": 1, "maximum": 50 },
440+
"cursor": { "type": "string", "description": "Optional pagination cursor from prior path_search response." },
441+
"auto_paginate": { "type": "boolean", "description": "If true, server attempts to return multiple pages up to max_pages/max_total_entries." },
442+
"max_pages": { "type": "integer", "minimum": 1, "maximum": 5 },
443+
"max_total_entries": { "type": "integer", "minimum": 1, "maximum": 200 }
434444
},
435445
"examples": [
436-
{ "repo": "pointer", "branch": "main", "query": "mcp serv", "limit": 10 }
446+
{ "repo": "pointer", "branch": "main", "query": "mcp_serv", "limit": 10 },
447+
{ "repo": "pointer", "branch": "main", "query": "mcp_serv", "limit": 10, "cursor": "10" },
448+
{ "repo": "pointer", "branch": "main", "query": "mcp_serv", "limit": 10, "auto_paginate": true, "max_pages": 3, "max_total_entries": 25 }
437449
],
438450
"required": ["repo", "branch", "query"],
439451
"additionalProperties": false
@@ -455,6 +467,7 @@ fn mcp_tools() -> Vec<Value> {
455467
"language": { "type": "string" },
456468
"scope": {
457469
"type": "string",
470+
"description": "Scope selector. Accepted values are case-insensitive.",
458471
"enum": ["repository", "directory", "file", "custom"]
459472
},
460473
"include_paths": {

src/mcp/tools.rs

Lines changed: 116 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ use crate::services::search_service::search;
2121
const MAX_BATCH_QUERIES: usize = 8;
2222
const FILE_LIST_QUERY_TIMEOUT: Duration = Duration::from_secs(8);
2323
const FILE_LIST_MAX_SOURCE_ROWS: usize = 200_000;
24+
const MAX_AUTO_PAGES: u8 = 5;
25+
const MAX_AUTO_TOTAL_PATH_ENTRIES: usize = 200;
26+
const MAX_AUTO_TOTAL_FILE_LIST_ENTRIES: usize = 5000;
2427

2528
pub async fn execute_search(payload: SearchToolRequest) -> Result<Value, String> {
2629
let request_echo = search_request_echo(&payload, None);
@@ -182,6 +185,14 @@ pub async fn execute_file_list(
182185
let root_path = normalize_repo_path(payload.path.unwrap_or_default());
183186
let requested_depth = payload.depth.clamp(1, 10);
184187
let limit = payload.limit.clamp(1, 5000);
188+
let offset = parse_cursor_offset(payload.cursor.as_deref())?;
189+
let auto_paginate = payload.auto_paginate.unwrap_or(false);
190+
let max_pages = payload.max_pages.unwrap_or(1).clamp(1, MAX_AUTO_PAGES);
191+
let max_total_entries = payload
192+
.max_total_entries
193+
.unwrap_or(limit)
194+
.clamp(limit, MAX_AUTO_TOTAL_FILE_LIST_ENTRIES)
195+
.min(limit.saturating_mul(max_pages as usize));
185196

186197
let like_pattern = if root_path.is_empty() {
187198
"%".to_string()
@@ -225,11 +236,51 @@ pub async fn execute_file_list(
225236
}
226237

227238
let mut entries = build_file_list_entries(&source_rows, &root_path, requested_depth);
228-
if entries.len() > limit {
229-
entries.truncate(limit);
239+
let total_entries = entries.len();
240+
241+
if auto_paginate {
242+
let slice_start = offset.min(total_entries);
243+
let slice_len = max_total_entries.min(total_entries.saturating_sub(slice_start));
244+
entries = entries
245+
.into_iter()
246+
.skip(slice_start)
247+
.take(slice_len)
248+
.collect();
249+
} else {
250+
let slice_start = offset.min(total_entries);
251+
let slice_len = limit.min(total_entries.saturating_sub(slice_start));
252+
entries = entries
253+
.into_iter()
254+
.skip(slice_start)
255+
.take(slice_len)
256+
.collect();
257+
}
258+
259+
let visible_limit = if auto_paginate {
260+
max_total_entries
261+
} else {
262+
limit
263+
};
264+
let has_more = offset.saturating_add(visible_limit) < total_entries;
265+
let next_cursor = has_more.then(|| (offset + entries.len()).to_string());
266+
let pages_fetched = if auto_paginate {
267+
let per_page = limit.max(1);
268+
let fetched = entries.len().div_ceil(per_page).max(1) as u8;
269+
Some(fetched.min(max_pages))
270+
} else {
271+
None
272+
};
273+
274+
if total_entries > limit {
230275
truncated = true;
231276
if truncated_reason.is_none() {
232-
truncated_reason = Some("file_list reached entry limit".to_string());
277+
let mut reason = "file_list reached entry limit".to_string();
278+
if has_more {
279+
reason.push_str("; narrow path/depth or continue with cursor");
280+
} else {
281+
reason.push_str("; narrow path/depth for fewer results");
282+
}
283+
truncated_reason = Some(reason);
233284
}
234285
}
235286

@@ -244,6 +295,9 @@ pub async fn execute_file_list(
244295
requested_depth,
245296
truncated,
246297
truncated_reason,
298+
has_more,
299+
next_cursor,
300+
pages_fetched,
247301
entries,
248302
index_freshness,
249303
})
@@ -257,24 +311,76 @@ pub async fn execute_path_search(
257311
}
258312
if looks_like_search_filter_query(&payload.query) {
259313
return Err(
260-
"path_search query must be plain fuzzy text, not filter syntax such as repo: or path:"
314+
"path_search query must be a plain query string, not filter syntax such as repo: or path:"
261315
.to_string(),
262316
);
263317
}
264318
let repo = payload.repo.clone();
265319
let branch = payload.branch.clone();
266-
let entries = search_repo_paths(repo.clone(), branch.clone(), payload.query, payload.limit)
267-
.await
268-
.map_err(|err| err.to_string())?;
320+
let limit = payload.limit.unwrap_or(10).clamp(1, 50) as usize;
321+
let offset = parse_cursor_offset(payload.cursor.as_deref())?;
322+
let auto_paginate = payload.auto_paginate.unwrap_or(false);
323+
let max_pages = payload.max_pages.unwrap_or(1).clamp(1, MAX_AUTO_PAGES);
324+
let max_total_entries = (payload
325+
.max_total_entries
326+
.unwrap_or(limit as u16)
327+
.clamp(limit as u16, MAX_AUTO_TOTAL_PATH_ENTRIES as u16)
328+
as usize)
329+
.min(limit.saturating_mul(max_pages as usize));
330+
331+
let target_count = if auto_paginate {
332+
max_total_entries
333+
} else {
334+
limit
335+
};
336+
let fetch_limit = (offset + target_count + 1).min(MAX_AUTO_TOTAL_PATH_ENTRIES + 1);
337+
let fetched = search_repo_paths(
338+
repo.clone(),
339+
branch.clone(),
340+
payload.query,
341+
Some(fetch_limit as u16),
342+
)
343+
.await
344+
.map_err(|err| err.to_string())?;
345+
let has_more = fetched.len() > offset + target_count;
346+
let entries: Vec<_> = fetched
347+
.into_iter()
348+
.skip(offset)
349+
.take(target_count)
350+
.collect();
351+
let next_cursor = has_more.then(|| (offset + entries.len()).to_string());
352+
let pages_fetched = if auto_paginate {
353+
let fetched_pages = entries.len().div_ceil(limit).max(1) as u8;
354+
Some(fetched_pages.min(max_pages))
355+
} else {
356+
None
357+
};
358+
269359
let index_freshness = resolve_branch_freshness(&repo, &branch, None)
270360
.await
271361
.unwrap_or_else(|_| unknown_freshness());
272362
Ok(PathSearchToolResponse {
273363
entries,
364+
has_more,
365+
next_cursor,
366+
pages_fetched,
274367
index_freshness,
275368
})
276369
}
277370

371+
fn parse_cursor_offset(cursor: Option<&str>) -> Result<usize, String> {
372+
let Some(cursor) = cursor else {
373+
return Ok(0);
374+
};
375+
let trimmed = cursor.trim();
376+
if trimmed.is_empty() {
377+
return Ok(0);
378+
}
379+
trimmed
380+
.parse::<usize>()
381+
.map_err(|_| "invalid cursor; expected a numeric offset".to_string())
382+
}
383+
278384
pub async fn execute_symbol_insights(
279385
payload: SymbolInsightsToolRequest,
280386
) -> Result<SymbolInsightsToolResponse, String> {
@@ -322,7 +428,7 @@ pub fn normalize_tool_error(tool: &str, err: String) -> (String, String, Option<
322428
"file_list_invalid_params".to_string(),
323429
"file_list does not accept `query`".to_string(),
324430
Some(
325-
"Use `path` as an exact directory prefix for file_list, or call path_search with a plain fuzzy `query`."
431+
"Use `path` as an exact directory prefix for file_list, or call path_search with a plain query string (case-insensitive substring matching)."
326432
.to_string(),
327433
),
328434
);
@@ -365,12 +471,12 @@ pub fn normalize_tool_error(tool: &str, err: String) -> (String, String, Option<
365471
),
366472
);
367473
}
368-
if tool == "path_search" && lower.contains("plain fuzzy text") {
474+
if tool == "path_search" && lower.contains("plain query string") {
369475
return (
370476
"path_search_invalid_query_syntax".to_string(),
371477
err,
372478
Some(
373-
"Use plain fuzzy text like `mcp serv` for path_search. Use file_list.path for directory prefixes and search.path/search.file for filters."
479+
"Use a plain query string like `mcp_serv` for path_search (case-insensitive substring matching). Use file_list.path for directory prefixes and search.path/search.file for filters."
374480
.to_string(),
375481
),
376482
);

0 commit comments

Comments
 (0)