Skip to content

Commit

Permalink
Merge pull request #2 from ittia-research/dev
Browse files Browse the repository at this point in the history
add settings, add status endpoint, update homepage to include tech stack
  • Loading branch information
etwk authored Aug 4, 2024
2 parents c962a87 + 286ed29 commit b0ddfd5
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 35 deletions.
6 changes: 4 additions & 2 deletions .env
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
HOSTING_CHECK_BASE_URL=http://127.0.0.1:8000
LLM_LOCAL_BASE_URL=http://xinference:9997/v1
EMBEDDING_MODEL_NAME=jinaai/jina-embeddings-v2-base-en
LLM_MODEL_NAME=google/gemma-2-27b-it
OLLAMA_BASE_URL=http://ollama:11434
OPENAI_API_KEY=sk-proj-aaaaaaaaaaaaaaaaa
OPENAI_BASE_URL=http://localhost:8000/v1
PROJECT_HOSTING_BASE_URL=http://127.0.0.1:8000
RAG_MODEL_DEPLOY=local
RERANK_MODEL_NAME=BAAI/bge-reranker-v2-m3
RERANK_BASE_URL=http://xinference:9997/v1
SEARCH_BASE_URL=https://s.jina.ai
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ True, false, or just opinions? Maybe not binary, but a percentage.

Fact-checking tools to combat disinformation.

## Get Started
Online demo: https://check.ittia.net

## Design
Input something.

Expand All @@ -28,10 +31,6 @@ Verdits:
- uncheckable: can't check without more background
- unavailable: service unavailable

## Get Started
Online demo: https://check.ittia.net
* Due to limited GPU resources, availbility of this demo are limited.

## Support
Please contact if you can provide resources for this project:
- AI API access
Expand Down
19 changes: 9 additions & 10 deletions src/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,25 @@

Settings.llm = MockLLM() # retrieve only, do not use LLM for synthesize

from settings import settings

import llama_index.postprocessor.jinaai_rerank.base as jinaai_rerank # todo: shall we lock package version?
jinaai_rerank.API_URL = os.environ.get("LLM_LOCAL_BASE_URL") + "/rerank" # switch to on-premise
jinaai_rerank.API_URL = settings.RERANK_BASE_URL + "/rerank" # switch to on-premise

# todo: high lantency between client and the ollama embedding server will slow down embedding a lot
from llama_index.embeddings.ollama import OllamaEmbedding

# set RAG model deploy mode
RAG_MODEL_DEPLOY = os.environ.get("RAG_MODEL_DEPLOY") or "local"

def build_automerging_index(
documents,
chunk_sizes=None,
):
chunk_sizes = chunk_sizes or [2048, 512, 128]

if RAG_MODEL_DEPLOY == "local":
embed_model="local:jinaai/jina-embeddings-v2-base-en"
if settings.RAG_MODEL_DEPLOY == "local":
embed_model="local:" + settings.EMBEDDING_MODEL_NAME
else:
embed_model = OllamaEmbedding(
model_name="jina/jina-embeddings-v2-base-en",
model_name=settings.EMBEDDING_MODEL_NAME,
base_url=os.environ.get("OLLAMA_BASE_URL"), # todo: any other configs here?
)

Expand Down Expand Up @@ -66,12 +65,12 @@ def get_automerging_query_engine(
base_retriever, automerging_index.storage_context, verbose=True
)

if RAG_MODEL_DEPLOY == "local":
if settings.RAG_MODEL_DEPLOY == "local":
rerank = SentenceTransformerRerank(
top_n=rerank_top_n, model="BAAI/bge-reranker-v2-m3",
top_n=rerank_top_n, model=settings.RERANK_MODEL_NAME,
) # todo: add support `trust_remote_code=True`
else:
rerank = jinaai_rerank.JinaRerank(api_key='', top_n=rerank_top_n, model="jina-reranker-v2")
rerank = jinaai_rerank.JinaRerank(api_key='', top_n=rerank_top_n, model=settings.RERANK_MODEL_NAME)

auto_merging_engine = RetrieverQueryEngine.from_args(
retriever, node_postprocessors=[rerank]
Expand Down
8 changes: 3 additions & 5 deletions src/llm.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
import os
from openai import OpenAI
import logging

import utils
from settings import settings

"""
About models:
- Gemma 2 does not support system rule
"""

LLM_MODEL_NAME = os.environ.get("LLM_MODEL_NAME") or "google/gemma-2-27b-it"

llm_client = OpenAI(
base_url=os.environ.get("OPENAI_BASE_URL"),
base_url=settings.OPENAI_BASE_URL,
api_key="token",
)

def get_llm_reply(prompt, temperature=0):
completion = llm_client.chat.completions.create(
model=LLM_MODEL_NAME,
model=settings.LLM_MODEL_NAME,
messages=[
{"role": "user", "content": prompt}
],
Expand Down
7 changes: 6 additions & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,17 @@ async def robots():
@app.get("/health")
async def health():
return {"status": "ok"}

@app.get("/status")
async def status():
_status = await utils.get_status()
return _status

@app.get("/{path:path}", response_class=PlainTextResponse)
async def catch_all(path: str):
try:
if not path:
return utils.get_homepage()
return await utils.get_homepage()
if not utils.check_input(path):
return HTMLResponse(status_code=404, content="Invalid request") # filter brower background requests
result = await fact_check(path)
Expand Down
18 changes: 18 additions & 0 deletions src/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os

class Settings:
def __init__(self):
# set model names
self.LLM_MODEL_NAME = os.environ.get("LLM_MODEL_NAME") or "google/gemma-2-27b-it"
self.EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME") or "jinaai/jina-embeddings-v2-base-en"
self.RERANK_MODEL_NAME = os.environ.get("RERANK_MODEL_NAME") or "BAAI/bge-reranker-v2-m3"

self.OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL") or "https://api.openai.com/v1"
self.RERANK_BASE_URL = os.environ.get("RERANK_BASE_URL") or "http://xinference:9997/v1"
self.PROJECT_HOSTING_BASE_URL = os.environ.get("PROJECT_HOSTING_BASE_URL") or "https://check.ittia.net"
self.SEARCH_BASE_URL = os.environ.get("SEARCH_BASE_URL") or "https://s.jina.ai"

# set RAG model deploy mode
self.RAG_MODEL_DEPLOY = os.environ.get("RAG_MODEL_DEPLOY") or "local"

settings = Settings()
48 changes: 35 additions & 13 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import re, json, ast, os
import re, json, ast
import aiohttp
import itertools
import logging

logger = logging.getLogger(__name__)

SEARCH_BASE_URL = os.environ.get("SEARCH_BASE_URL")
from settings import settings

def llm2list(text):
list_obj = []
Expand All @@ -16,7 +14,7 @@ def llm2list(text):
if match:
list_obj = ast.literal_eval(match.group())
except Exception as e:
logger.warning(f"Failed convert LLM response to list: {e}")
logging.warning(f"Failed convert LLM response to list: {e}")
pass
return list_obj

Expand All @@ -28,7 +26,7 @@ def llm2json(text):
if match:
json_object = json.loads(match.group())
except Exception as e:
logger.warning(f"Failed convert LLM response to JSON: {e}")
logging.warning(f"Failed convert LLM response to JSON: {e}")
pass
return json_object

Expand All @@ -39,7 +37,7 @@ async def search(keywords):
Todo:
- Enhance response clear.
"""
constructed_url = SEARCH_BASE_URL + '/' + keywords
constructed_url = settings.SEARCH_BASE_URL + '/' + keywords

headers = {
"Accept": "application/json"
Expand All @@ -55,7 +53,7 @@ async def search(keywords):
text = "\n\n".join([doc['content'] for doc in response_data['data']])
result = clear_md_links(text)
except Exception as e:
logger.error(f"Search '{keywords}' failed: {e}")
logging.error(f"Search '{keywords}' failed: {e}")
result = ''

return result
Expand Down Expand Up @@ -181,11 +179,35 @@ def check_input(input):

return True

def get_homepage():
md = f"""(preview only)
async def get_homepage():
# get tech stack
stack = await get_stack()
md = f"## Tech stack:\n"
lines = [md]
lines.extend([f"{key}: {value}" for key, value in stack.items()])
md = "\n".join(lines)

md = f"""Fact-check API
Fact-check API
[Usage] {settings.PROJECT_HOSTING_BASE_URL}/YOUR_FACT_CHECK_QUERY
[Usage] {os.environ.get("HOSTING_CHECK_BASE_URL")}/YOUR_FACT_CHECK_QUERY
{md}
"""
return md
return md

async def get_stack():
# current tech stack
stack = {
"LLM model": settings.LLM_MODEL_NAME,
"Embedding model": settings.EMBEDDING_MODEL_NAME,
"Rerank model": settings.RERANK_MODEL_NAME,
"RAG deploy mode": settings.RAG_MODEL_DEPLOY,
}
return stack

async def get_status():
stack = await get_stack()
status = {
"stack": stack
}
return status

0 comments on commit b0ddfd5

Please sign in to comment.