Skip to content

Commit 2362afb

Browse files
committed
feat(python): polishing knowledge apis
1 parent f43809d commit 2362afb

File tree

15 files changed

+241
-249
lines changed

15 files changed

+241
-249
lines changed

examples/004/images/main/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ async def main(node):
1313

1414
us_federal_code = StaticKnowledgeProvider("us_federal_code")
1515
for name, url in docs.items():
16-
await us_federal_code.add_document(name, url, content_type="text/markdown")
16+
await us_federal_code.add(name, url, content_type="text/markdown")
1717

1818
await Agent.start(
1919
node=node,

examples/005/images/main/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
async def main(node):
55
pioneer_docs = Memory("pioneer_ai_documents")
6-
await pioneer_docs.add_document(
6+
await pioneer_docs.add(
77
"Ownership in Pioneer.ai",
88
"http://localhost:5555/ownership.md",
99
content_type="text/markdown",

implementations/python/examples/13.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
async def main(node):
1010
restaurants = Memory("restaurants")
1111

12-
await restaurants.add_document(
13-
Document.inline(
12+
await restaurants.add(
13+
Document(
1414
"Tony's Pizzeria Menu",
1515
"""
1616
1. Margherita - $10
@@ -23,7 +23,7 @@ async def main(node):
2323
)
2424
)
2525

26-
await restaurants.add_document(
26+
await restaurants.add(
2727
Document.inline(
2828
"Diner Menu",
2929
"""

implementations/python/examples/14.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from ockam import Agent, Model, Node, Memory, Retrieval
2-
from ockam.knowledge.protocol import Document
1+
from ockam import Agent, Model, Node, Memory, Document, SEARCHABLE
32

43
"""
54
This example shows how a model can be enriched with knowledge coming from documents retrieved online.
@@ -20,13 +19,13 @@ async def main(node):
2019
]
2120

2221
for document in documents:
23-
await ockam_documentation.add_document(
24-
Document.url(
25-
document,
26-
f"{base_url}/{document}",
22+
await ockam_documentation.add(
23+
Document(
24+
name=document,
25+
url=f"{base_url}/{document}",
2726
content_type="text/markdown",
2827
),
29-
retrieval=Retrieval.SEARCHABLE_PIECES
28+
retrieval=SEARCHABLE
3029
)
3130

3231
agent = await Agent.start(

implementations/python/python/ockam/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from .agents import Agent, AgentReference, HttpServer, Repl
22
from .clusters import Cluster, Zone
33
from .flows import Flow, FlowOperation, START, END
4+
from .knowledge.protocol import Document
45
from .nodes.message import FlowReference
56
from .history import ConversationHistory
67
from .logging import info, warning, error, debug, set_log_levels, get_logger
@@ -16,9 +17,13 @@
1617
Retrieval,
1718
)
1819
from .gather import gather
20+
from .knowledge.memory import Retrieval
1921

2022
from .ockam_in_rust_for_python import Mailbox, McpClient, McpServer
2123

24+
SEARCHABLE = Retrieval.SEARCHABLE
25+
WHOLE = Retrieval.WHOLE
26+
2227
__doc__ = ""
2328
__all__ = [
2429
# from .agents
@@ -49,7 +54,7 @@
4954
# from .tools
5055
"McpTool",
5156
"Tool",
52-
# from .memory
57+
# from .history
5358
"ConversationHistory",
5459
# from .models
5560
"Model",
@@ -70,9 +75,12 @@
7075
"DynamicPlanner",
7176
# from .squads
7277
"Squad",
73-
# from .knowledge
78+
# from .memory
7479
"Memory",
7580
"Retrieval",
81+
"Document",
82+
"SEARCHABLE",
83+
"WHOLE",
7684
# from .gather
7785
"gather",
7886
]

implementations/python/python/ockam/agents/agent.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from ..knowledge import Memory
99
from .conversation_response import ConversationResponse
10+
from ..knowledge.providers import ProviderSearchResultAggregator
1011
from ..nodes import RemoteNode, LocalNodeProtocol
1112
from ..planning import Planner
1213

@@ -142,7 +143,7 @@ async def _handle_planning_state(self):
142143
if next_step is None:
143144
break
144145
plan_completed = False
145-
self.contextual_knowledge = await self.agent.add_knowledge_search(next_step.content)
146+
self.contextual_knowledge = await self.agent.memory.search(next_step.content)
146147
if next_step.phase == Phase.PLANNING:
147148
await self.agent.remember(self.scope, self.conversation, next_step)
148149
if self.stream:
@@ -361,9 +362,9 @@ async def handle__conversation_snippet(
361362
if message.role == ConversationRole.USER:
362363
query = message.content
363364

364-
contextual_knowledge = None
365+
contextual_knowledge: ProviderSearchResultAggregator = ProviderSearchResultAggregator()
365366
if self.memory is not None:
366-
contextual_knowledge = await self.add_knowledge_search(query)
367+
contextual_knowledge = await self.memory.search(query)
367368

368369
# Create and initialize the state machine
369370
state_machine = AgentStateMachine(
@@ -394,7 +395,6 @@ async def add_knowledge_search(self, query: str) -> Optional[str]:
394395
if self.memory is None or not query or len(query) == 0:
395396
return None
396397

397-
await self.memory.add_query(query)
398398
return self.memory.render_text()
399399

400400
async def remember(self, scope: str, conversation: str, message: ConversationMessage):
@@ -410,11 +410,12 @@ async def complete_chat(
410410
) -> AsyncGenerator[Tuple[Optional[Exception], bool, AssistantMessage], None]:
411411
message_history = await self.message_history(scope, conversation)
412412

413-
if contextual_knowledge is not None:
413+
knowledge = contextual_knowledge.render()
414+
if knowledge is not None:
414415
message_history = [
415416
{
416417
"role": "system",
417-
"content": "The following knowledge could be useful to answer properly:\n" + contextual_knowledge,
418+
"content": "The following knowledge could be useful to answer properly:\n" + knowledge,
418419
}
419420
] + message_history
420421

implementations/python/python/ockam/knowledge/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from .memory import Memory, Retrieval
22
from .providers import KnowledgeProviderAggregator, SearchableKnowledgeProvider, SimpleKnowledgeProvider
33
from .protocol import KnowledgeProvider
4-
from .search import SearchHit, SearchResults, TextPiece
4+
from .search import SearchHit, TextPiece
55
from .storage.in_memory import InMemory
66
from .storage.database import Database
77
from .storage import create_storage
@@ -18,7 +18,6 @@
1818
"KnowledgeProviderAggregator",
1919
"SearchableKnowledgeProvider",
2020
"SearchHit",
21-
"SearchResults",
2221
"TextPiece",
2322
"TextExtractor",
2423
"Chunker",
Lines changed: 31 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from enum import Enum
2-
from typing import Dict, List, Optional
2+
from typing import Dict, List, Optional, Any, Coroutine
33

4-
from .protocol import Document
5-
from .providers import KnowledgeProvider, SearchableKnowledgeProvider, SimpleKnowledgeProvider
6-
from .search import SearchHit, SearchResults
4+
from .protocol import Document, ProviderSearchResult
5+
from .providers import KnowledgeProvider, SearchableKnowledgeProvider, SimpleKnowledgeProvider, \
6+
ProviderSearchResultAggregator
77
from ..models import Model
88
from ..logging.logging import get_logging_config
99
import logging.config
@@ -24,93 +24,64 @@ class Retrieval(Enum):
2424
Search within the document and retrieve only relevant sections matching the query.
2525
Use this for large or structured documents where only specific parts are needed.
2626
"""
27-
ALWAYS_WHOLE = "always",
28-
SEARCHABLE_PIECES = "searchable",
29-
27+
WHOLE = "whole",
28+
SEARCHABLE = "searchable",
3029

3130
class Memory:
32-
namespace: str
33-
providers: List[KnowledgeProvider]
31+
scope: str
32+
knowledge_providers: List[KnowledgeProvider]
3433
whole_provider: SimpleKnowledgeProvider
3534
searchable_provider: SearchableKnowledgeProvider
36-
search_results: SearchResults
3735
max_knowledge_size: int
3836

3937
def __init__(
4038
self,
41-
namespace: str,
39+
scope: str,
4240
model: Model = Model.default_embedding_model(),
4341
max_knowledge_size: int = 4096,
44-
providers: Optional[List[KnowledgeProvider]] = None,
42+
knowledge_providers: Optional[List[KnowledgeProvider]] = None,
4543
):
4644
"""
4745
Initializes a Knowledge instance.
4846
49-
:param namespace: A string representing the unique namespace for the knowledge.
47+
:param scope: A string representing the unique scope of the knowledge.
5048
:param max_knowledge_size: Maximum size of knowledge in characters.
51-
:param providers: A list of KnowledgeProvider instances to be used for searching.
49+
:param knowledge_providers: A list of KnowledgeProvider instances to be used for searching.
5250
"""
53-
self.namespace = namespace
54-
self.whole_provider = SimpleKnowledgeProvider(namespace)
55-
self.searchable_provider = SearchableKnowledgeProvider(namespace, model)
56-
self.providers = [self.whole_provider, self.searchable_provider]
57-
if providers:
58-
self.providers.extend(providers)
59-
self.search_results = SearchResults()
51+
self.scope = scope
52+
self.whole_provider = SimpleKnowledgeProvider(scope)
53+
self.searchable_provider = SearchableKnowledgeProvider(scope, model)
54+
self.knowledge_providers = [self.whole_provider, self.searchable_provider]
55+
if knowledge_providers:
56+
self.knowledge_providers.extend(knowledge_providers)
6057
self.max_knowledge_size = max_knowledge_size
6158

62-
async def add_document(self, document: Document, retrieval: Retrieval = Retrieval.ALWAYS_WHOLE) -> None:
59+
async def add(self, document: Document, retrieval: Retrieval = Retrieval.WHOLE) -> None:
6360
"""
6461
Add a document to the knowledge base.
6562
6663
:param document: The document to add.
6764
:param retrieval: Retrieval strategy to use.
6865
:raises ValueError: If the retrieval type is unknown.
6966
"""
70-
if retrieval == Retrieval.ALWAYS_WHOLE:
71-
await self.whole_provider.add_document(document)
72-
elif retrieval == Retrieval.SEARCHABLE_PIECES:
73-
await self.searchable_provider.add_document(document)
67+
if retrieval == Retrieval.WHOLE:
68+
await self.whole_provider.add(document)
69+
elif retrieval == Retrieval.SEARCHABLE:
70+
await self.searchable_provider.add(document)
7471
else:
7572
raise ValueError(f"Unknown retrieval type: {retrieval}")
7673

77-
async def add_query(
74+
async def search(
7875
self, query: str,
79-
) -> None:
76+
) -> ProviderSearchResultAggregator:
8077
"""
8178
This method will search across all providers and store the results.
8279
8380
:param query: Search query string.
8481
"""
85-
hits: List[SearchHit] = []
86-
for provider in self.providers:
87-
provider_hits = await provider.search(query)
88-
hits.extend(provider_hits)
89-
self.search_results.add(hits)
90-
91-
def render_text(self) -> Optional[str]:
92-
"""
93-
Renders the text from the search results into a string.
94-
95-
:return: Formatted text from search results, or None when there are no results.
96-
"""
97-
initial_knowledge_size = None
98-
while True:
99-
view: Dict[str, List[str]] = self.search_results.view()
100-
if len(view) > 0:
101-
contextual_knowledge: str = ""
102-
for document_name, text_pieces in view.items():
103-
contextual_knowledge += f"Document name: {document_name}\n"
104-
for text_piece in text_pieces:
105-
contextual_knowledge += f"- {text_piece}\n"
106-
contextual_knowledge += "\n"
107-
108-
if len(contextual_knowledge) <= self.max_knowledge_size:
109-
if initial_knowledge_size is not None:
110-
logger.info(f"Knowledge size reduced from {initial_knowledge_size} to {len(contextual_knowledge)} characters.")
111-
return contextual_knowledge
112-
else:
113-
initial_knowledge_size = len(contextual_knowledge)
114-
self.search_results.reduce_size()
115-
else:
116-
return None
82+
results: List[ProviderSearchResult] = []
83+
for provider in self.knowledge_providers:
84+
results.append(
85+
await provider.search(query)
86+
)
87+
return ProviderSearchResultAggregator(results)

implementations/python/python/ockam/knowledge/protocol.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from typing import Protocol, List, Optional
2-
from .search import SearchHit
1+
from typing import Protocol, Optional
32
import secrets
43

54

@@ -31,9 +30,25 @@ def url(name: str, url: str, content_type: Optional[str], id: Optional[str] = No
3130
return Document(name, url=url, content_type=content_type, id=id)
3231

3332

33+
class ProviderSearchResult(Protocol):
34+
def render(self) -> Optional[str]:
35+
"""
36+
Renders the search result into a string format.
37+
38+
:return: A string representation of the search result.
39+
"""
40+
...
41+
42+
def reduce_size(self) -> None:
43+
"""
44+
Reduces the size of the search result.
45+
This method removes the oldest or least relevant results.
46+
"""
47+
...
48+
3449

3550
class KnowledgeProvider(Protocol):
36-
async def search(self, query: str) -> List[SearchHit]:
51+
async def search(self, query: str) -> ProviderSearchResult:
3752
"""
3853
Asynchronously searches for results that are most relevant to the provided query.
3954
It uses an embedding model to convert the query into an embedding vector for efficient
@@ -45,4 +60,3 @@ async def search(self, query: str) -> List[SearchHit]:
4560
:return: A list of search results that match the criteria.
4661
:rtype: list
4762
"""
48-

0 commit comments

Comments
 (0)