Skip to content

Commit

Permalink
Merge pull request #3 from ittia-research/dev
Browse files Browse the repository at this point in the history
add env RAG_CHUNK_SIZES
  • Loading branch information
etwk authored Aug 4, 2024
2 parents b0ddfd5 + 29bfbd0 commit efc1809
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 2 deletions.
1 change: 1 addition & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ RAG_MODEL_DEPLOY=local
RERANK_MODEL_NAME=BAAI/bge-reranker-v2-m3
RERANK_BASE_URL=http://xinference:9997/v1
SEARCH_BASE_URL=https://s.jina.ai
RAG_CHUNK_SIZES=[4096, 1024, 256]
2 changes: 1 addition & 1 deletion src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def get_contexts(statement, keywords, text):
document = Document(text=text)
index = build_automerging_index(
[document],
chunk_sizes=[8192, 2048, 512],
chunk_sizes=settings.RAG_CHUNK_SIZES,
) # todo: will it better to use retriever directly?

query_engine = get_automerging_query_engine(index, similarity_top_k=16)
Expand Down
10 changes: 9 additions & 1 deletion src/settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
import os, ast

class Settings:
def __init__(self):
Expand All @@ -15,4 +15,12 @@ def __init__(self):
# set RAG model deploy mode
self.RAG_MODEL_DEPLOY = os.environ.get("RAG_MODEL_DEPLOY") or "local"

# set RAG chunk sizes
self.RAG_CHUNK_SIZES = [4096, 1024, 256]
_chunk_sizes = os.environ.get("RAG_CHUNK_SIZES")
try:
self.RAG_CHUNK_SIZES = ast.literal_eval(_chunk_sizes)
except:
pass

settings = Settings()
3 changes: 3 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ async def get_homepage():
[Usage] {settings.PROJECT_HOSTING_BASE_URL}/YOUR_FACT_CHECK_QUERY
[Source] https://github.com/ittia-research/check
{md}
"""
return md
Expand All @@ -201,6 +203,7 @@ async def get_stack():
"LLM model": settings.LLM_MODEL_NAME,
"Embedding model": settings.EMBEDDING_MODEL_NAME,
"Rerank model": settings.RERANK_MODEL_NAME,
"RAG chunk sizes": settings.RAG_CHUNK_SIZES,
"RAG deploy mode": settings.RAG_MODEL_DEPLOY,
}
return stack
Expand Down

0 comments on commit efc1809

Please sign in to comment.