opensearch-project · brunoselvacj · Oct 15, 2025 · rithin-pullela-aws · Nov 7, 2025 · rithin-pullela-aws
diff --git a/README.md b/README.md
@@ -77,6 +77,8 @@ The following tools are available but disabled by default. To enable them, see t
   - `opensearch_url` (optional): The OpenSearch cluster URL to connect to
   - `index` (required): The name of the index to search in
   - `query` (required): The search query in OpenSearch Query DSL format
+  - `size` (optional): Maximum number of hits to return (default: 10, max: 100). Limits response size to prevent token overflow
+  - `from` (optional): Starting offset for pagination (default: 0). Use with size for pagination
 
 - **GetShardsTool**
   - `opensearch_url` (optional): The OpenSearch cluster URL to connect to
@@ -115,6 +117,7 @@ The following tools are available but disabled by default. To enable them, see t
 
   - `opensearch_url` (optional): The OpenSearch cluster URL to connect to
   - `index` (optional): Limit the information returned to the specified indices. If not provided, returns segments for all indices
+  - `limit` (optional): Maximum number of segments to return (default: 1000). Limits response size to prevent token overflow
 
 - **CatNodesTool**
 

diff --git a/src/opensearch/helper.py b/src/opensearch/helper.py
@@ -45,10 +45,28 @@ def get_index_mapping(args: GetIndexMappingArgs) -> json:
 
 
 def search_index(args: SearchIndexArgs) -> json:
+    """Search an index with pagination support.
+
+    Args:
+        args: SearchIndexArgs containing index, query, and optional pagination params
+
+    Returns:
+        json: Search results from OpenSearch
+    """
     from .client import initialize_client
 
     client = initialize_client(args)
-    response = client.search(index=args.index, body=args.query)
+
+    # Ensure query is a dict for merging
+    query_body = args.query if isinstance(args.query, dict) else {}
+
+    # Apply pagination parameters (override any user-provided values)
+    # Cap size at maximum of 100 to prevent token overflow
+    effective_size = min(args.size, 100) if args.size else 10
+    query_body['size'] = effective_size
+    query_body['from'] = args.from_ if args.from_ is not None else 0
+
+    response = client.search(index=args.index, body=query_body)
     return response
 
 
@@ -62,21 +80,26 @@ def get_shards(args: GetShardsArgs) -> json:
 
 def get_segments(args: GetSegmentsArgs) -> json:
     """Get information about Lucene segments in indices.
-    
+
     Args:
-        args: GetSegmentsArgs containing optional index filter
-        
+        args: GetSegmentsArgs containing optional index filter and limit
+
     Returns:
         json: Segment information for the specified indices or all indices
     """
     from .client import initialize_client
-    
+
     client = initialize_client(args)
-    
+
     # If index is provided, filter by that index
     index_param = args.index if args.index else None
-    
+
     response = client.cat.segments(index=index_param, format='json')
+
+    # Apply limit to prevent token overflow
+    if args.limit and isinstance(response, list):
+        return response[:args.limit]
+
     return response
 
 

diff --git a/src/tools/tool_params.py b/src/tools/tool_params.py
@@ -31,6 +31,21 @@ class GetIndexMappingArgs(baseToolArgs):
 class SearchIndexArgs(baseToolArgs):
     index: str = Field(description='The name of the index to search in')
     query: Any = Field(description='The search query in OpenSearch query DSL format')
+    size: Optional[int] = Field(
+        default=10,
+        description='Maximum number of hits to return (default: 10, max: 100). Limits response size to prevent token overflow. Values exceeding 100 will be capped at 100.',
+        ge=1,
+    )
+    from_: Optional[int] = Field(
+        default=0,
+        description='Starting offset for pagination (default: 0). Use with size for pagination.',
+        alias='from',
+        ge=0,
+        serialization_alias='from',
+    )
+
+    class Config:
+        populate_by_name = True
 
 
 class GetShardsArgs(baseToolArgs):
@@ -65,12 +80,17 @@ class Config:
 
 class GetSegmentsArgs(baseToolArgs):
     """Arguments for the GetSegmentsTool."""
-    
+
     index: Optional[str] = Field(
-        default=None, 
+        default=None,
         description='Limit the information returned to the specified indices. If not provided, returns segments for all indices.'
     )
-
+    limit: Optional[int] = Field(
+        default=1000,
+        description='Maximum number of segments to return (default: 1000). Limits response size to prevent token overflow.',
+        ge=1,
+    )
+
     class Config:
         json_schema_extra = {
             "examples": [

diff --git a/src/tools/tools.py b/src/tools/tools.py
@@ -499,7 +499,7 @@ async def get_long_running_tasks_tool(args: GetLongRunningTasksArgs) -> list[dic
     },
     'SearchIndexTool': {
         'display_name': 'SearchIndexTool',
-        'description': 'Searches an index using a query written in query domain-specific language (DSL) in OpenSearch',
+        'description': 'Searches an index using a query written in query domain-specific language (DSL) in OpenSearch. Supports pagination with size (default: 10, max: 100) and from parameters to limit response size and prevent token overflow.',
         'input_schema': SearchIndexArgs.model_json_schema(),
         'function': search_index_tool,
         'args_model': SearchIndexArgs,
@@ -524,7 +524,7 @@ async def get_long_running_tasks_tool(args: GetLongRunningTasksArgs) -> list[dic
     },
     'GetSegmentsTool': {
         'display_name': 'GetSegmentsTool',
-        'description': 'Gets information about Lucene segments in indices, including memory usage, document counts, and segment sizes. Can be filtered by specific indices.',
+        'description': 'Gets information about Lucene segments in indices, including memory usage, document counts, and segment sizes. Can be filtered by specific indices. Supports limit parameter (default: 1000) to prevent token overflow.',
         'input_schema': GetSegmentsArgs.model_json_schema(),
         'function': get_segments_tool,
         'args_model': GetSegmentsArgs,