-
Notifications
You must be signed in to change notification settings - Fork 291
/
Copy pathserve_vector_store.py
67 lines (53 loc) · 1.82 KB
/
serve_vector_store.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""Document indexing.
Building a vector store fast.
Adapted from open_source_LLM_search_engine:
https://github.com/ray-project/langchain-ray/
You can run this from the terminal in the search_engine directory like this:
> PYTHONPATH=../ python serve_vector_store.py
"""
import time
import requests
from config import set_environment
from fastapi import FastAPI
from langchain_community.vectorstores import FAISS
from ray import serve
from search_engine.utils import INDEX_PATH, get_embeddings
# set keys:
set_environment()
app = FastAPI()
@serve.deployment()
@serve.ingress(app)
class VectorSearchDeployment:
def __init__(self):
# Load the data from faiss
st = time.time()
self.embeddings = get_embeddings()
self.db = FAISS.load_local(INDEX_PATH, self.embeddings)
et = time.time() - st
print(f"Loading database took {et} seconds.")
@app.get("/search")
def search(self, query: str):
results = self.db.max_marginal_relevance_search(query, k=1, fetch_k=10)
retval = ""
for i in range(len(results)):
chunk = results[i]
source = chunk.metadata["source"]
retval = retval + f"From http://{source}\n\n"
retval = retval + chunk.page_content
retval = retval + "\n====\n\n"
return retval
deployment = VectorSearchDeployment.bind()
serve.run(deployment)
if __name__ == "__main__":
# using bind() instead of remote()
# this will ready the dag, but not execute it yet.
print(
requests.get(
"http://localhost:8000/search",
params={
"query": "What are the different components of Ray"
" and how can they help with large language models (LLMs)?"
},
).json()
)
input("Press Enter to shut down the server...")