-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbedrock_kb.py
66 lines (53 loc) · 1.58 KB
/
bedrock_kb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import logging
import boto3
import log
import urllib
knowledge_base_id = os.environ.get("KNOWLEDGE_BASE_ID")
logging.warning(f"using knowledge base id {knowledge_base_id}")
kb = boto3.client('bedrock-agent-runtime')
def get_relevant_docs(query, top_k):
"""retrieves relevant documents from a vector store"""
response = kb.retrieve(
knowledgeBaseId=knowledge_base_id,
retrievalConfiguration={
"vectorSearchConfiguration": {
"numberOfResults": top_k,
}
},
retrievalQuery={"text": query}
)
logging.info(
f"kb response found {len(response['retrievalResults'])} results")
log.debug(response)
return response["retrievalResults"]
def format_sources(sources):
"""
Formats source docs in a standard format.
Return dict example:
[
{
"name": "MyDocument.pdf",
"url": "https://document-server/MyDocument.pdf"
"score": .8525
}
]
"""
result = []
for source in sources:
key = ""
if source["location"]["type"] == "S3":
# remove s3 bucket prefix for name and dedupe
parsed = urllib.parse.urlparse(
source["location"]["s3Location"]["uri"])
key = parsed.path.lstrip("/")
# todo: get presigned url
url = ""
# dedupe
if not any(d["name"] == key for d in result):
result.append({
"name": key,
"url": url,
"score": source["score"],
})
return result