diff --git a/.github/workflows/build-and-push-images.yml b/.github/workflows/build-and-push-images.yml index e535efc67..06c8d7f88 100644 --- a/.github/workflows/build-and-push-images.yml +++ b/.github/workflows/build-and-push-images.yml @@ -9,7 +9,7 @@ concurrency: on: push: - branches: [release-docker-build] + branches: [release-docs] paths-ignore: - '**.md' - 'images/**/*' @@ -77,93 +77,16 @@ jobs: type=ref,event=tag type=ref,event=pr - - name: "Build and push multi-platform Docker image: genai-stack/pull-model" + - name: "Build and push multi-platform Docker image: genai-stack/release-api" uses: docker/build-push-action@v5 with: context: . - file: ./pull_model.Dockerfile + file: ./release_docs_api.Dockerfile push: true platforms: linux/amd64,linux/arm64 # tags: ${{ steps.meta.outputs.tags }} - tags: releaseai/genai-stack-pull-model:latest + tags: releaseai/genai-stack-release-api:latest labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max - - name: "Build and push multi-platform Docker image: genai-stack/loader" - uses: docker/build-push-action@v5 - with: - context: . - file: ./loader.Dockerfile - push: true - platforms: linux/amd64,linux/arm64 - # tags: ${{ steps.meta.outputs.tags }} - tags: releaseai/genai-stack-loader:latest - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - - name: "Build and push multi-platform Docker image: genai-stack/custom-loader" - uses: docker/build-push-action@v5 - with: - context: . - file: ./custom_loader.Dockerfile - push: true - platforms: linux/amd64,linux/arm64 - # tags: ${{ steps.meta.outputs.tags }} - tags: releaseai/genai-stack-custom-loader:latest - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - - name: "Build and push multi-platform Docker image: genai-stack/bot" - uses: docker/build-push-action@v5 - with: - context: . - file: ./bot.Dockerfile - push: true - platforms: linux/amd64,linux/arm64 - # tags: ${{ steps.meta.outputs.tags }} - tags: releaseai/genai-stack-bot:latest - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - - name: "Build and push multi-platform Docker image: genai-stack/pdf-bot" - uses: docker/build-push-action@v5 - with: - context: . - file: ./pdf_bot.Dockerfile - push: true - platforms: linux/amd64,linux/arm64 - # tags: ${{ steps.meta.outputs.tags }} - tags: releaseai/genai-stack-pdf-bot:latest - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - - name: "Build and push multi-platform Docker image: genai-stack/api" - uses: docker/build-push-action@v5 - with: - context: . - file: ./api.Dockerfile - push: true - platforms: linux/amd64,linux/arm64 - # tags: ${{ steps.meta.outputs.tags }} - tags: releaseai/genai-stack-api:latest - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max - - - name: "Build and push multi-platform Docker image: genai-stack/front-end" - uses: docker/build-push-action@v5 - with: - context: . - file: ./front-end.Dockerfile - push: true - platforms: linux/amd64,linux/arm64 - # tags: ${{ steps.meta.outputs.tags }} - tags: releaseai/genai-stack-front-end:latest - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max diff --git a/.release/application_template.yaml b/.release/application_template.yaml index 3687c98fd..f52677d30 100644 --- a/.release/application_template.yaml +++ b/.release/application_template.yaml @@ -1,15 +1,10 @@ --- -auto_deploy: true -context: release-ry6clz -domain: ai-playground.releaseapp.io +execution_type: server repo_name: awesome-release/genai-stack hostnames: -- api: api-${env_id}.${domain} -- bot: bot-${env_id}.${domain} -- front-end: front-end-${env_id}.${domain} -- loader: loader-${env_id}.${domain} -- pdf-bot: pdf-bot-${env_id}.${domain} -- ollama: ollama-${env_id}.${domain} +- api: api-release-docs-rag-${env_id}.${domain} +- chroma: chroma-release-docs-rag-${env_id}.${domain} +- front-end: front-end-release-docs-rag-${env_id}.${domain} environment_templates: - name: ephemeral - name: permanent @@ -21,85 +16,35 @@ resources: limits: 1Gi requests: 100Mi replicas: 1 -ingress: - proxy_body_size: 30m - proxy_buffer_size: 64k - proxy_buffering: true - proxy_buffers_number: 4 - proxy_max_temp_file_size: 1024m - proxy_read_timeout: '180' - proxy_send_timeout: '180' shared_volumes: - name: models - size: 40Gi + size: 60Gi type: persistent parameters: - name: llm type: string description: Can be any Ollama model tag, or gpt-4 or gpt-3.5 or claudev2 - default: llama2 -- name: embedding_model - type: string - description: Can be sentence_transformer, openai, aws, ollama or google-genai-embedding-001 - default: sentence_transformer + default: llama3:8b + optional: true services: -- name: ollama - image: ollama/ollama - command: - - "/bin/ollama" - args: - - serve - envs: - - key: LLM - value: "${parameters.llm}" - - key: EMBEDDING_MODEL - value: "${parameters.embedding_model}" - memory: - limits: 64Gi - requests: 16Gi - cpu: - limits: 128 - requests: 12 - ports: - - type: node_port - target_port: '11434' - port: '11434' - loadbalancer: false - node_selector: - - key: nvidia.com/gpu - value: 'true' - - key: beta.kubernetes.io/instance-type - value: g5.12xlarge - volumes: - - name: shmem - type: shmem - size: 16Gi - mount_path: "/dev/shm" - - claim: models - mount_path: "/models" - name: api - image: awesome-release/genai-stack/api - build: - context: "." - dockerfile: api.Dockerfile - has_repo: true + image: awesome-release/genai-stack/release-api volumes: [] command: - uvicorn - - api:app + - release_docs_api:app - "--host" - 0.0.0.0 - "--port" - '8504' - envs: - - key: LLM - value: "${parameters.llm}" - - key: EMBEDDING_MODEL - value: "${parameters.embedding_model}" + depends_on: + - chroma readiness_probe: - http_get: - path: "/" - port: 8504 + exec: + command: + - curl + - "-f" + - http://localhost:8504/ period_seconds: 5 timeout_seconds: 3 failure_threshold: 5 @@ -109,152 +54,138 @@ services: target_port: '8504' port: '8504' loadbalancer: false -- name: bot - image: awesome-release/genai-stack/bot - build: - context: "." - dockerfile: bot.Dockerfile - has_repo: true - volumes: [] - command: - - streamlit - - run - - bot.py - - "--server.port=8501" - - "--server.address=0.0.0.0" envs: + - key: CHROMA_HOST + value: chroma + - key: CHROMA_PORT + value: 8000 - key: LLM value: "${parameters.llm}" - key: EMBEDDING_MODEL - value: "${parameters.embedding_model}" - ports: - - type: node_port - target_port: '8501' - port: '8501' - loadbalancer: false -- name: database - image: neo4j:5.11 + value: sentence_transformer + - key: OLLAMA_BASE_URL + value: http://ollama:11434 + - key: LANGCHAIN_API_KEY + value: "${LANGCHAIN_API_KEY}" + - key: LANGCHAIN_PROJECT + value: "${LANGCHAIN_PROJECT}" + - key: LANGCHAIN_ENDPOINT + value: https://api.smith.langchain.com + - key: LANGCHAIN_TRACING_V2 + value: false +- name: chroma + image: chromadb/chroma has_repo: false - volumes: [] + args: + - "--workers" + - '1' + - "--host" + - 0.0.0.0 + - "--port" + - '8000' + - "--proxy-headers" + - "--log-config" + - chromadb/log_config.yml + - "--timeout-keep-alive" + - '30' readiness_probe: - http_get: - path: "/" - port: 7474 - period_seconds: 15 - timeout_seconds: 30 - failure_threshold: 10 + exec: + command: + - curl + - "-f" + - http://localhost:8000/api/v1/heartbeat + period_seconds: 30 + timeout_seconds: 10 + failure_threshold: 3 initial_delay_seconds: 0 ports: - type: node_port - target_port: '7474' - port: '7474' - loadbalancer: false - - type: node_port - target_port: '7687' - port: '7687' + target_port: '8000' + port: '8000' loadbalancer: false - name: front-end - static: true - build_base: front-end - build_output_directory: dist/ - build_package_install_command: npm install - build_command: npm run build -- name: loader - image: awesome-release/genai-stack/loader + image: awesome-release/genai-stack/front-end build: context: "." - dockerfile: loader.Dockerfile + dockerfile: front-end.Dockerfile has_repo: true - volumes: [] command: - - streamlit + - npm - run - - loader.py - - "--server.port=8502" - - "--server.address=0.0.0.0" + - dev + depends_on: + - api ports: - type: node_port - target_port: '8502' - port: '8502' - loadbalancer: false - - type: node_port - target_port: '8080' - port: '8081' + target_port: '8505' + port: '8505' loadbalancer: false -- name: pdf-bot - image: awesome-release/genai-stack/pdf_bot - build: - context: "." - dockerfile: pdf_bot.Dockerfile - has_repo: true - command: - - streamlit - - run - - pdf_bot.py - - "--server.port=8503" - - "--server.address=0.0.0.0" envs: - - key: LLM - value: "${parameters.llm}" - - key: EMBEDDING_MODEL - value: "${parameters.embedding_model}" + - key: VITE_API_BASE_URL + value: "${API_INGRESS_URL}" +- name: ollama + image: ollama/ollama:latest + has_repo: false + memory: + limits: 64Gi + requests: 4Gi + cpu: + limits: 128 + requests: 2 + command: + - ollama + args: + - serve ports: - type: node_port - target_port: '8503' - port: '8503' + target_port: '11434' + port: '11434' loadbalancer: false + node_selector: + - key: nvidia.com/gpu + value: 'true' + volumes: + - name: shmem + type: shmem + size: 16Gi + mount_path: "/dev/shm" + - claim: models + mount_path: "/root/.ollama" +jobs: - name: pull-model - image: awesome-release/genai-stack/pull-model - build: - context: "." - dockerfile: pull_model.Dockerfile - has_repo: true + image: releaseai/genai-stack-pull-model command: - bb - "-f" - pull_model.clj -jobs: -- name: pull-model - from_services: pull-model envs: + - key: OLLAMA_BASE_URL + value: http://ollama:11434 - key: LLM value: "${parameters.llm}" - - key: EMBEDDING_MODEL - value: "${parameters.embedding_model}" workflows: - name: setup parallelize: - - step: services-parallel - wait_for_finish: false - tasks: - - services.front-end - step: services-0 tasks: - - services.database + - services.chroma - services.ollama - step: services-1 tasks: + - services.api - jobs.pull-model - step: services-2 tasks: - - services.api - - services.bot - - services.loader - - services.pdf-bot + - services.front-end - name: patch parallelize: - - step: services-parallel - wait_for_finish: false - tasks: - - services.front-end - - step: services-1 + - step: services-0 tasks: - services.api - - services.bot - - services.loader - - services.pdf-bot - name: teardown parallelize: - step: remove-environment tasks: - release.remove_environment + + diff --git a/chains.py b/chains.py index 9ad10f406..068b43be1 100644 --- a/chains.py +++ b/chains.py @@ -1,3 +1,4 @@ +import os from langchain_openai import OpenAIEmbeddings from langchain_community.embeddings import OllamaEmbeddings @@ -48,9 +49,15 @@ def load_embedding_model(embedding_model_name: str, logger=BaseLogger(), config= dimension = 768 logger.info("Embedding: Using Google Generative AI Embeddings") else: - embeddings = SentenceTransformerEmbeddings( - model_name="all-MiniLM-L6-v2", cache_folder="/embedding_model" - ) + if os.path.exists("/embedding_model"): + embeddings = SentenceTransformerEmbeddings( + model_name="all-MiniLM-L6-v2", cache_folder="/embedding_model" + ) + else: + embeddings = SentenceTransformerEmbeddings( + model_name="all-MiniLM-L6-v2", cache_folder="./embedding_model" + ) + dimension = 384 logger.info("Embedding: Using SentenceTransformer") return embeddings, dimension diff --git a/release-docker-compose.yml b/release-docker-compose.yml new file mode 100644 index 000000000..82d6deed1 --- /dev/null +++ b/release-docker-compose.yml @@ -0,0 +1,118 @@ +services: + + llm: &llm + image: ollama/ollama:latest + profiles: ["linux"] + networks: + - net + + llm-gpu: + <<: *llm + profiles: ["linux-gpu"] + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + chroma: + image: chromadb/chroma + volumes: + # Be aware that indexed data are located in "/chroma/chroma/" + # Default configuration for persist_directory in chromadb/config.py + # Read more about deployments: https://docs.trychroma.com/deployment + - chroma-data:/chroma/chroma + command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" + environment: + - IS_PERSISTENT=TRUE + - CHROMA_SERVER_AUTHN_PROVIDER=${CHROMA_SERVER_AUTHN_PROVIDER} + - CHROMA_SERVER_AUTHN_CREDENTIALS_FILE=${CHROMA_SERVER_AUTHN_CREDENTIALS_FILE} + - CHROMA_SERVER_AUTHN_CREDENTIALS=${CHROMA_SERVER_AUTHN_CREDENTIALS} + - CHROMA_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_AUTH_TOKEN_TRANSPORT_HEADER} + - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} + - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} + - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} + - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME} + - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY} + - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} + restart: unless-stopped # possible values are: "no", always", "on-failure", "unless-stopped" + ports: + - "8000:8000" + healthcheck: + # Adjust below to match your container port + test: [ "CMD", "curl", "-f", "http://localhost:8000/api/v1/heartbeat" ] + interval: 30s + timeout: 10s + retries: 3 + networks: + - net + + api: + build: + context: . + dockerfile: release_docs_api.Dockerfile + volumes: + - $PWD/embedding_model:/embedding_model + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY} + - GOOGLE_API_KEY=${GOOGLE_API_KEY} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL-http://host.docker.internal:11434} + - LLM=${LLM-llama2} + - EMBEDDING_MODEL=${EMBEDDING_MODEL-sentence_transformer} + - LANGCHAIN_ENDPOINT=${LANGCHAIN_ENDPOINT-"https://api.smith.langchain.com"} + - LANGCHAIN_TRACING_V2=${LANGCHAIN_TRACING_V2-false} + - LANGCHAIN_PROJECT=${LANGCHAIN_PROJECT} + - LANGCHAIN_API_KEY=${LANGCHAIN_API_KEY} + networks: + - net + depends_on: + chroma: + condition: service_healthy +# pull-model: +# condition: service_completed_successfully + x-develop: + watch: + - action: rebuild + path: . + ignore: + - loader.py + - bot.py + - pdf_bot.py + - front-end/ + ports: + - 8504:8504 + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 http://localhost:8504/ || exit 1"] + interval: 5s + timeout: 3s + retries: 5 + + front-end: + build: + context: . + dockerfile: front-end.Dockerfile + x-develop: + watch: + - action: sync + path: ./front-end + target: /app + ignore: + - ./front-end/node_modules/ + - action: rebuild + path: ./front-end/package.json + depends_on: + api: + condition: service_healthy + networks: + - net + ports: + - 8505:8505 + +volumes: + chroma-data: + driver: local + +networks: + net: diff --git a/release-docs-gitbook-ingest.py b/release-docs-gitbook-ingest.py new file mode 100755 index 000000000..39f4281d5 --- /dev/null +++ b/release-docs-gitbook-ingest.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +import os + +import chromadb +from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings + +from dotenv import load_dotenv +from utils import BaseLogger + +import glob +from typing import List +from multiprocessing import Pool +from tqdm import tqdm + +from langchain_community.document_loaders import ( + GitbookLoader, +) + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.vectorstores import Chroma +from langchain.docstore.document import Document + +from chains import ( + load_embedding_model, + load_llm, +) + + +load_dotenv(".env") + +chroma_collection = os.getenv("CHROMA_COLLECTION", "release-docs") +chroma_host = os.getenv("CHROMA_HOST", "localhost") +chroma_port = int(os.getenv("CHROMA_PORT", 8000)) +ollama_base_url = os.getenv("OLLAMA_BASE_URL") +embedding_model_name = os.getenv("EMBEDDING_MODEL") +llm_name = os.getenv("LLM") + +embeddings, dimension = load_embedding_model( + embedding_model_name, + config={"ollama_base_url": ollama_base_url}, + logger=BaseLogger(), +) + + +# Load environment variables +gitbook_url = os.environ.get('GITBOOK_URL', 'https://docs.release.com') +chunk_size = 500 +chunk_overlap = 50 + + +# Initialize Chroma client +chroma_client = chromadb.HttpClient( + host=chroma_host, + port=chroma_port, + ssl=False, + headers=None, + settings=Settings(), + tenant=DEFAULT_TENANT, + database=DEFAULT_DATABASE, +) + +# create vector database if it doesn't exist +chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"}) + + +def process_documents(ignored_files: List[str] = []) -> List[Document]: + """ + Load documents and split in chunks + """ + print(f"Loading documents from {gitbook_url}") + + loader = GitbookLoader(gitbook_url, load_all_paths=True) + documents = loader.load() + + if not documents: + print("No new documents to load") + exit(0) + print(f"Loaded {len(documents)} new documents from {gitbook_url}") + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) + texts = text_splitter.split_documents(documents) + print(f"Split into {len(texts)} chunks of text (max. {chunk_size} tokens each)") + return texts + +def does_vectorstore_exist() -> bool: + """ + Checks if vectorstore exists + """ + chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"}) + return True + +def main(): + + does_vectorstore_exist() + + # Update and store locally vectorstore + print(f"Appending to existing vectorstore") + db = Chroma(client=chroma_client, collection_name=chroma_collection, embedding_function=embeddings) + collection = db.get() + + texts = process_documents([metadata['source'] for metadata in collection['metadatas']]) + print(f"Creating embeddings. May take some minutes...") + db.add_documents(texts) + + db = None + + print(f"Ingestion complete! You can now query your documents") + + +if __name__ == "__main__": + main() diff --git a/release-docs-ingest.py b/release-docs-ingest.py new file mode 100755 index 000000000..60129316a --- /dev/null +++ b/release-docs-ingest.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +import os + +import chromadb +from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings + +from dotenv import load_dotenv +from utils import BaseLogger + +import glob +from typing import List +from multiprocessing import Pool +from tqdm import tqdm + +from langchain_community.document_loaders import ( + CSVLoader, + EverNoteLoader, + PyMuPDFLoader, + TextLoader, + UnstructuredEmailLoader, + UnstructuredEPubLoader, + UnstructuredHTMLLoader, + UnstructuredMarkdownLoader, + UnstructuredODTLoader, + UnstructuredPowerPointLoader, + UnstructuredWordDocumentLoader, +) + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.vectorstores import Chroma +from langchain.docstore.document import Document + +from chains import ( + load_embedding_model, + load_llm, +) + + +load_dotenv(".env") + +chroma_collection = os.getenv("CHROMA_COLLECTION", "release-docs") +chroma_host = os.getenv("CHROMA_HOST", "localhost") +chroma_port = int(os.getenv("CHROMA_PORT", 8000)) +ollama_base_url = os.getenv("OLLAMA_BASE_URL") +embedding_model_name = os.getenv("EMBEDDING_MODEL") +llm_name = os.getenv("LLM") + +embeddings, dimension = load_embedding_model( + embedding_model_name, + config={"ollama_base_url": ollama_base_url}, + logger=BaseLogger(), +) + + +# Load environment variables +source_directory = os.environ.get('SOURCE_DIRECTORY', 'documents') +chunk_size = 500 +chunk_overlap = 50 + + +# Initialize Chroma client +chroma_client = chromadb.HttpClient( + host=chroma_host, + port=chroma_port, + ssl=False, + headers=None, + settings=Settings(), + tenant=DEFAULT_TENANT, + database=DEFAULT_DATABASE, +) + +# create vector database if it doesn't exist +chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"}) + + + +# Custom document loaders +class MyElmLoader(UnstructuredEmailLoader): + """Wrapper to fallback to text/plain when default does not work""" + + def load(self) -> List[Document]: + """Wrapper adding fallback for elm without html""" + try: + try: + doc = UnstructuredEmailLoader.load(self) + except ValueError as e: + if 'text/html content not found in email' in str(e): + # Try plain text + self.unstructured_kwargs["content_source"]="text/plain" + doc = UnstructuredEmailLoader.load(self) + else: + raise + except Exception as e: + # Add file_path to exception message + raise type(e)(f"{self.file_path}: {e}") from e + + return doc + + +# Map file extensions to document loaders and their arguments +LOADER_MAPPING = { + ".csv": (CSVLoader, {}), + # ".docx": (Docx2txtLoader, {}), + ".doc": (UnstructuredWordDocumentLoader, {}), + ".docx": (UnstructuredWordDocumentLoader, {}), + ".enex": (EverNoteLoader, {}), + ".eml": (MyElmLoader, {}), + ".epub": (UnstructuredEPubLoader, {}), + ".html": (UnstructuredHTMLLoader, {}), + # ".md": (UnstructuredMarkdownLoader, { "mode": "elements", "encoding": "utf8" }), + ".md": (TextLoader, {"encoding": "utf8"}), + ".odt": (UnstructuredODTLoader, {}), + ".pdf": (PyMuPDFLoader, {}), + ".ppt": (UnstructuredPowerPointLoader, {}), + ".pptx": (UnstructuredPowerPointLoader, {}), + ".txt": (TextLoader, {"encoding": "utf8"}), + # Add more mappings for other file extensions and loaders as needed +} + + +def load_single_document(file_path: str) -> List[Document]: + if os.path.getsize(file_path) != 0: + filename, ext = os.path.splitext(file_path) + if ext in LOADER_MAPPING: + loader_class, loader_args = LOADER_MAPPING[ext] + try: + loader = loader_class(file_path, **loader_args) + if loader: + return loader.load() + except: + print(f"Corrupted file {file_path}. Ignoring it.") + else: + print(f"Unsupported file {file_path}. Ignoring it.") + else: + print(f"Empty file {file_path}. Ignoring it.") + + +def load_documents(source_dir: str, ignored_files: List[str] = []) -> List[Document]: + """ + Loads all documents from the source documents directory, ignoring specified files + """ + all_files = [] + for ext in LOADER_MAPPING: + all_files.extend( + glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True) + ) + filtered_files = [file_path for file_path in all_files if file_path not in ignored_files] + + with Pool(processes=os.cpu_count()) as pool: + results = [] + with tqdm(total=len(filtered_files), desc='Loading new documents', ncols=80) as pbar: + for i, docs in enumerate(pool.imap_unordered(load_single_document, filtered_files)): + if docs: + results.extend(docs) + pbar.update() + + return results + +def process_documents(ignored_files: List[str] = []) -> List[Document]: + """ + Load documents and split in chunks + """ + print(f"Loading documents from {source_directory}") + documents = load_documents(source_directory, ignored_files) + if not documents: + print("No new documents to load") + exit(0) + print(f"Loaded {len(documents)} new documents from {source_directory}") + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) + texts = text_splitter.split_documents(documents) + print(f"Split into {len(texts)} chunks of text (max. {chunk_size} tokens each)") + return texts + +def does_vectorstore_exist() -> bool: + """ + Checks if vectorstore exists + """ + chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"}) + return True + +def main(): + + does_vectorstore_exist() + + # Update and store locally vectorstore + print(f"Appending to existing vectorstore") + db = Chroma(client=chroma_client, collection_name=chroma_collection, embedding_function=embeddings) + collection = db.get() + + texts = process_documents([metadata['source'] for metadata in collection['metadatas']]) + print(f"Creating embeddings. May take some minutes...") + db.add_documents(texts) + + db = None + + print(f"Ingestion complete! You can now query your documents") + + +if __name__ == "__main__": + main() diff --git a/release_docs_api.Dockerfile b/release_docs_api.Dockerfile new file mode 100644 index 000000000..e126b4027 --- /dev/null +++ b/release_docs_api.Dockerfile @@ -0,0 +1,21 @@ +FROM langchain/langchain + +WORKDIR /app + +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + software-properties-common \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . + +RUN pip install --upgrade -r requirements.txt + +COPY release_docs_api.py . +COPY utils.py . +COPY chains.py . + +HEALTHCHECK CMD curl --fail http://localhost:8504 + +ENTRYPOINT [ "uvicorn", "release_docs_api:app", "--host", "0.0.0.0", "--port", "8504" ] diff --git a/release_docs_api.py b/release_docs_api.py new file mode 100644 index 000000000..2013d21a4 --- /dev/null +++ b/release_docs_api.py @@ -0,0 +1,197 @@ +import os + +import chromadb +from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings +from langchain.vectorstores import Chroma + +from dotenv import load_dotenv +from utils import BaseLogger +from chains import ( + load_embedding_model, + load_llm, + configure_llm_only_chain, + RetrievalQAWithSourcesChain +) + +from langchain.chains.qa_with_sources import load_qa_with_sources_chain + +from langchain.prompts import ( + ChatPromptTemplate, + HumanMessagePromptTemplate, + SystemMessagePromptTemplate +) + + +from fastapi import FastAPI, Depends +from pydantic import BaseModel +from langchain.callbacks.base import BaseCallbackHandler +from threading import Thread +from queue import Queue, Empty +from collections.abc import Generator +from sse_starlette.sse import EventSourceResponse +from fastapi.middleware.cors import CORSMiddleware +import json + +load_dotenv(".env") + +chroma_collection = os.getenv("CHROMA_COLLECTION", "release-docs") +chroma_host = os.getenv("CHROMA_HOST", "localhost") +chroma_port = int(os.getenv("CHROMA_PORT", 8000)) +ollama_base_url = os.getenv("OLLAMA_BASE_URL") +embedding_model_name = os.getenv("EMBEDDING_MODEL") +llm_name = os.getenv("LLM") + +embeddings, dimension = load_embedding_model( + embedding_model_name, + config={"ollama_base_url": ollama_base_url}, + logger=BaseLogger(), +) + +# Initialize Chroma client +chroma_client = chromadb.HttpClient( + host=chroma_host, + port=chroma_port, + ssl=False, + headers=None, + settings=Settings(), + tenant=DEFAULT_TENANT, + database=DEFAULT_DATABASE, +) + +# create vector database if it doesn't exist +chroma_client.get_or_create_collection(chroma_collection, metadata={"key": "value"}) + +llm = load_llm( + llm_name, logger=BaseLogger(), config={"ollama_base_url": ollama_base_url} +) + +llm_chain = configure_llm_only_chain(llm) + +# PROMPT TEMPLATE +print("the dog is really raw!") +general_system_template = """ +---- +{summaries} +---- +""" +general_user_template = "Question:```{question}```" +messages = [ + SystemMessagePromptTemplate.from_template(general_system_template), + HumanMessagePromptTemplate.from_template(general_user_template), +] +qa_prompt = ChatPromptTemplate.from_messages(messages) + +qa_chain = load_qa_with_sources_chain( + llm, + chain_type="stuff", + prompt=qa_prompt, +) + + +langchainChroma = Chroma(client=chroma_client, collection_name=chroma_collection, embedding_function=embeddings) + +rag_chain = RetrievalQAWithSourcesChain( + combine_documents_chain=qa_chain, + retriever=langchainChroma.as_retriever(search_kwargs={"k": 2}), + reduce_k_below_max_tokens=False, + max_tokens_limit=3375, +) + + +class QueueCallback(BaseCallbackHandler): + """Callback handler for streaming LLM responses to a queue.""" + + def __init__(self, q): + self.q = q + + def on_llm_new_token(self, token: str, **kwargs) -> None: + self.q.put(token) + + def on_llm_end(self, *args, **kwargs) -> None: + return self.q.empty() + + +def stream(cb, q) -> Generator: + job_done = object() + + def task(): + x = cb() + q.put(job_done) + + t = Thread(target=task) + t.start() + + content = "" + + # Get each new token from the queue and yield for our generator + while True: + try: + next_token = q.get(True, timeout=1) + if next_token is job_done: + break + content += next_token + yield next_token, content + except Empty: + continue + + +app = FastAPI() +origins = ["*"] + +app.add_middleware( + CORSMiddleware, + allow_origins=origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/") +async def root(): + return {"message": "Hello World"} + + +class Question(BaseModel): + text: str + rag: bool = False + + +class BaseTicket(BaseModel): + text: str + + +@app.get("/query-stream") +def qstream(question: Question = Depends()): + output_function = llm_chain + if question.rag: + output_function = rag_chain + + q = Queue() + + def cb(): + output_function( + {"question": question.text, "chat_history": []}, + callbacks=[QueueCallback(q)], + ) + + def generate(): + yield json.dumps({"init": True, "model": llm_name}) + for token, _ in stream(cb, q): + yield json.dumps({"token": token}) + + return EventSourceResponse(generate(), media_type="text/event-stream") + + +@app.get("/query") +async def ask(question: Question = Depends()): + output_function = llm_chain + if question.rag: + output_function = rag_chain + result = output_function( + {"question": question.text, "chat_history": []}, callbacks=[] + ) + + return {"result": result["answer"], "model": llm_name} + + diff --git a/release_docs_gitbook_ingest.Dockerfile b/release_docs_gitbook_ingest.Dockerfile new file mode 100644 index 000000000..fa7b240e5 --- /dev/null +++ b/release_docs_gitbook_ingest.Dockerfile @@ -0,0 +1,20 @@ +FROM langchain/langchain + +WORKDIR /app + +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + software-properties-common \ + && rm -rf /var/lib/apt/lists/* + +COPY requirements.txt . + +RUN pip install --upgrade -r requirements.txt + +COPY release-docs-gitbook-ingest.py . +COPY utils.py . +COPY chains.py . +COPY images ./images + +ENTRYPOINT ["python", "release-docs-gitbook-ingest.py"] diff --git a/requirements.txt b/requirements.txt index ad0f6f905..a62b27e50 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,4 @@ boto3 langchain-openai langchain-community langchain-google-genai +chromadb==0.5.0