Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ The following tools are available but disabled by default. To enable them, see t
- `opensearch_url` (optional): The OpenSearch cluster URL to connect to
- `index` (required): The name of the index to search in
- `query` (required): The search query in OpenSearch Query DSL format
- `size` (optional): Maximum number of hits to return (default: 10, max: 100). Limits response size to prevent token overflow
- `from` (optional): Starting offset for pagination (default: 0). Use with size for pagination

- **GetShardsTool**
- `opensearch_url` (optional): The OpenSearch cluster URL to connect to
Expand Down Expand Up @@ -115,6 +117,7 @@ The following tools are available but disabled by default. To enable them, see t

- `opensearch_url` (optional): The OpenSearch cluster URL to connect to
- `index` (optional): Limit the information returned to the specified indices. If not provided, returns segments for all indices
- `limit` (optional): Maximum number of segments to return (default: 1000). Limits response size to prevent token overflow

- **CatNodesTool**

Expand Down
37 changes: 30 additions & 7 deletions src/opensearch/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,28 @@ def get_index_mapping(args: GetIndexMappingArgs) -> json:


def search_index(args: SearchIndexArgs) -> json:
"""Search an index with pagination support.

Args:
args: SearchIndexArgs containing index, query, and optional pagination params

Returns:
json: Search results from OpenSearch
"""
from .client import initialize_client

client = initialize_client(args)
response = client.search(index=args.index, body=args.query)

# Ensure query is a dict for merging
query_body = args.query if isinstance(args.query, dict) else {}

# Apply pagination parameters (override any user-provided values)
# Cap size at maximum of 100 to prevent token overflow
effective_size = min(args.size, 100) if args.size else 10
query_body['size'] = effective_size
query_body['from'] = args.from_ if args.from_ is not None else 0
Comment on lines +65 to +67
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make the max limit variable configurable? Either via CLI parameter or config file? We can default to a maximum of 100 if the variable is not provided


response = client.search(index=args.index, body=query_body)
return response


Expand All @@ -62,21 +80,26 @@ def get_shards(args: GetShardsArgs) -> json:

def get_segments(args: GetSegmentsArgs) -> json:
"""Get information about Lucene segments in indices.

Args:
args: GetSegmentsArgs containing optional index filter
args: GetSegmentsArgs containing optional index filter and limit

Returns:
json: Segment information for the specified indices or all indices
"""
from .client import initialize_client

client = initialize_client(args)

# If index is provided, filter by that index
index_param = args.index if args.index else None

response = client.cat.segments(index=index_param, format='json')

# Apply limit to prevent token overflow
if args.limit and isinstance(response, list):
return response[:args.limit]

Comment on lines +99 to +102
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we apply the max limit, perhaps adding the information that the size has been truncated in the response message sent back to the MCP client would help?

The same thing can be applied for the Search Index tool response as well

return response


Expand Down
26 changes: 23 additions & 3 deletions src/tools/tool_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ class GetIndexMappingArgs(baseToolArgs):
class SearchIndexArgs(baseToolArgs):
index: str = Field(description='The name of the index to search in')
query: Any = Field(description='The search query in OpenSearch query DSL format')
size: Optional[int] = Field(
default=10,
description='Maximum number of hits to return (default: 10, max: 100). Limits response size to prevent token overflow. Values exceeding 100 will be capped at 100.',
ge=1,
)
from_: Optional[int] = Field(
default=0,
description='Starting offset for pagination (default: 0). Use with size for pagination.',
alias='from',
ge=0,
serialization_alias='from',
)

class Config:
populate_by_name = True


class GetShardsArgs(baseToolArgs):
Expand Down Expand Up @@ -65,12 +80,17 @@ class Config:

class GetSegmentsArgs(baseToolArgs):
"""Arguments for the GetSegmentsTool."""

index: Optional[str] = Field(
default=None,
default=None,
description='Limit the information returned to the specified indices. If not provided, returns segments for all indices.'
)

limit: Optional[int] = Field(
default=1000,
description='Maximum number of segments to return (default: 1000). Limits response size to prevent token overflow.',
ge=1,
)

class Config:
json_schema_extra = {
"examples": [
Expand Down
4 changes: 2 additions & 2 deletions src/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ async def get_long_running_tasks_tool(args: GetLongRunningTasksArgs) -> list[dic
},
'SearchIndexTool': {
'display_name': 'SearchIndexTool',
'description': 'Searches an index using a query written in query domain-specific language (DSL) in OpenSearch',
'description': 'Searches an index using a query written in query domain-specific language (DSL) in OpenSearch. Supports pagination with size (default: 10, max: 100) and from parameters to limit response size and prevent token overflow.',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new information about the parameters is sent to the LLM as parameter description. Maybe we can limit the tool description to just it's function and not describe its parameters?

'input_schema': SearchIndexArgs.model_json_schema(),
'function': search_index_tool,
'args_model': SearchIndexArgs,
Expand All @@ -524,7 +524,7 @@ async def get_long_running_tasks_tool(args: GetLongRunningTasksArgs) -> list[dic
},
'GetSegmentsTool': {
'display_name': 'GetSegmentsTool',
'description': 'Gets information about Lucene segments in indices, including memory usage, document counts, and segment sizes. Can be filtered by specific indices.',
'description': 'Gets information about Lucene segments in indices, including memory usage, document counts, and segment sizes. Can be filtered by specific indices. Supports limit parameter (default: 1000) to prevent token overflow.',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same concern as above

'input_schema': GetSegmentsArgs.model_json_schema(),
'function': get_segments_tool,
'args_model': GetSegmentsArgs,
Expand Down
Loading
Loading