Merge branch 'enhance-prompts' into dev

Ahmath-Gadji · Ahmath-Gadji · commit 6ab614af55c0 · 2025-10-23T10:39:39.000Z
diff --git a/.hydra_config/config.yaml b/.hydra_config/config.yaml
@@ -85,8 +85,8 @@ prompts:
   multi_query: multi_query_pmpt_tmpl.txt
 
 loader:
-  image_captioning: true
-  save_markdown: false
+  image_captioning: ${oc.decode:${oc.env:IMAGE_CAPTIONING, true}}
+  save_markdown: ${oc.decode:${oc.env:SAVE_MARKDOWN, true}}
   audio_model: ${oc.env:WHISPER_MODEL, base} # tiny, base, small, medium, large-v1, large-v2, large-v3
   mimetypes:
     text/plain: .txt
diff --git a/openrag/components/indexer/loaders/base.py b/openrag/components/indexer/loaders/base.py
@@ -20,13 +20,13 @@ class BaseLoader(ABC):
     def __init__(self, **kwargs) -> None:
         self.page_sep = "[PAGE_SEP]"
         self.config = kwargs.get("config")
-        vlm_config = self.config.vlm
+        settings: dict = dict(self.config.vlm)
         model_settings = {
             "temperature": 0.2,
             "max_retries": 3,
             "timeout": 60,
+            "extra_body": {"chat_template_kwargs": {"enable_thinking": False}},
         }
-        settings: dict = vlm_config
         settings.update(model_settings)
 
         self.image_captioning = self.config.loader.get("image_captioning", False)
diff --git a/openrag/components/indexer/vectordb/vectordb.py b/openrag/components/indexer/vectordb/vectordb.py
@@ -112,6 +112,22 @@ async def get_chunk_by_id(self, chunk_id: str):
 
 MAX_LENGTH = 65_535
 
+analyzer_params = {
+    "tokenizer": "standard",
+    "filter": [
+        {
+            "type": "stop",  # Specifies the filter type as stop
+            "stop_words": [
+                "<image_description>",
+                "</image_description>",
+                "[Image Placeholder]",
+                "_english_",
+                "_french_",
+            ],  # Defines custom stop words and includes the English and French stop word list
+        }
+    ],
+}
+
 
 @ray.remote
 class MilvusDB(BaseVectorDB):
@@ -247,6 +263,7 @@ def _create_schema(self):
             enable_analyzer=True,
             enable_match=True,
             max_length=MAX_LENGTH,
+            analyzer_params=analyzer_params,
         )
 
         schema.add_field(
diff --git a/prompts/example1/image_captioning_tmpl.txt b/prompts/example1/image_captioning_tmpl.txt
@@ -1,24 +1,19 @@
-You are an expert tasked with describing images. 
-Your mission is to produce a factual, structured and complete description in markdown format in the same language as that used in the image.
+You are an expert in image description.
 
-1. Non-informative content such as logos, icons, emojis, isolated objects, photos:
-  * Provide a short description without going into details related to colors, themes, etc.
-    * Example descriptions: `Nike logo`, `Photo of a cat`, `Folder icon`, etc.
+## Rules
+- Use the language shown in the image.
+- Do not describe colors, shapes, or styles unless they are part of the data.
+- Never add, infer, or translate information.
 
-2. Text Content
-  - Transcribe the text in its entirety, without adding additional information.
+## 1. Simple / Non-informative images
+- If there is no text or non the image is non-informative at all → output “[Image Placeholder]”.
+- It it contains text → transcribe it exactly, using Markdown if structured (headings, lists, emphasis).
+- If it’s a logo with text → output only the textual content
 
-3. Tables
-  - Use correct Markdown table syntax to reproduce tables from the content.  
-  - Ensure alignment, readability, and preservation of all data while keeping the table structure intact.
+## 2. Informative content: tables, charts, diagrams, interfaces, or structured documents.
+  1. Transcribe all numerical and categorical values and **Format it** as **markdown structured table**.
+  2. Provide a concise description of what the graph represents.
+  3. Highlight trends, patterns, and key conclusions.
 
-4. For advanced visuals: charts, graphs, diagrams, schemas, or other data visualizations
-  a. Firstly do a markdown conversion:
-    - convert visible data as markdown tables whenever possible: numbers should be included accurately.
-    - Include the figure’s title if present.
-
-  b. Secondly do a figure interpretation in the same language as the document’s:
-    - Provide a brief description of the visual’s content, context, and purpose.
-    - Interpret the figure and mention any visible trends, patterns, or key insights (include numbers) and using the legends.
-
-The output should be in the same language as the content of the image
+## Output
+The output must remain factual, concise, and strictly limited to what is visible in the image.