agno-agi
diff --git a/‎libs/agno/agno/knowledge/reader/reader_factory.py‎
Lines changed: 20 additions & 17 deletions b/‎libs/agno/agno/knowledge/reader/reader_factory.py‎
Lines changed: 20 additions & 17 deletions
diff --git a/‎libs/agno/agno/media.py‎
Lines changed: 59 additions & 24 deletions b/‎libs/agno/agno/media.py‎
Lines changed: 59 additions & 24 deletions
diff --git a/‎libs/agno/agno/os/routers/agents/router.py‎
Lines changed: 4 additions & 53 deletions b/‎libs/agno/agno/os/routers/agents/router.py‎
Lines changed: 4 additions & 53 deletions
diff --git a/‎libs/agno/agno/os/routers/teams/router.py‎
Lines changed: 4 additions & 29 deletions b/‎libs/agno/agno/os/routers/teams/router.py‎
Lines changed: 4 additions & 29 deletions
diff --git a/‎libs/agno/agno/os/utils.py‎
Lines changed: 19 additions & 7 deletions b/‎libs/agno/agno/os/utils.py‎
Lines changed: 19 additions & 7 deletions
@@ -2,6 +2,7 @@
 from typing import Any, Callable, Dict, List, Optional
 
 from agno.knowledge.reader.base import Reader
+from agno.utils.common import MIME_TO_EXTENSION
 
 
 class ReaderFactory:
@@ -364,30 +365,32 @@ def create_reader(cls, reader_key: str, **kwargs) -> Reader:
 
     @classmethod
     def get_reader_for_extension(cls, extension: str) -> Reader:
-        """Get the appropriate reader for a file extension."""
-        # TODO: add docling for unique file extensions eg: images, audios, etc.
-        extension = extension.lower()
-
-        if extension in [".pdf", "application/pdf"]:
+        """Get the appropriate reader for a file extension or MIME type."""
+        # 1. Standardize the input: lower() and remove optional leading dot
+        ext = extension.lower().strip()
+        if ext.startswith("."):
+            ext = ext[1:]
+
+        # 2. Check if the input is a full MIME type and convert to short extension
+        if ext in MIME_TO_EXTENSION:
+            ext = MIME_TO_EXTENSION[ext]
+
+        # 3. Route to the specialized reader based on the normalized format
+        if ext == "pdf":
             return cls.create_reader("pdf")
-        elif extension in [".csv", "text/csv"]:
+        elif ext == "csv":
             return cls.create_reader("csv")
-        elif extension in [
-            ".xlsx",
-            ".xls",
-            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-            "application/vnd.ms-excel",
-        ]:
+        elif ext in ["xlsx", "xls"]:
             return cls.create_reader("excel")
-        elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
+        elif ext in ["docx", "doc"]:
             return cls.create_reader("docx")
-        elif extension == ".pptx":
+        elif ext == "pptx":
             return cls.create_reader("pptx")
-        elif extension == ".json":
+        elif ext == "json":
             return cls.create_reader("json")
-        elif extension in [".md", ".markdown"]:
+        elif ext in ["md", "markdown"]:
             return cls.create_reader("markdown")
-        elif extension in [".txt", ".text"]:
+        elif ext in ["txt", "text"]:
             return cls.create_reader("text")
         else:
             # Default to text reader for unknown extensions
 
@@ -5,6 +5,7 @@
 
 from pydantic import BaseModel, field_validator, model_validator
 
+from agno.utils.common import MIME_TO_EXTENSION
 from agno.utils.log import log_error
 
 
@@ -52,6 +53,21 @@ def validate_and_normalize_content(cls, data: Any):
 
         return data
 
+    @field_validator("mime_type")
+    @classmethod
+    def validate_mime_type(cls, v):
+        """Validate that the mime_type is one of the allowed types."""
+        if v is not None:
+            v_lower = v.lower()
+            if v_lower not in cls.valid_mime_types():
+                raise ValueError(f"Invalid MIME type: {v}. Must be one of: {cls.valid_mime_types()}")
+            return v_lower
+        return v
+
+    @classmethod
+    def valid_mime_types(cls) -> List[str]:
+        return [m for m, e in MIME_TO_EXTENSION.items() if m.startswith("image/")]
+
     def get_content_bytes(self) -> Optional[bytes]:
         """Get image content as raw bytes, loading from URL/file if needed"""
         if self.content:
@@ -168,6 +184,21 @@ def validate_and_normalize_content(cls, data: Any):
 
         return data
 
+    @field_validator("mime_type")
+    @classmethod
+    def validate_mime_type(cls, v):
+        """Validate that the mime_type is one of the allowed types."""
+        if v is not None:
+            v_lower = v.lower()
+            if v_lower not in cls.valid_mime_types():
+                raise ValueError(f"Invalid MIME type: {v}. Must be one of: {cls.valid_mime_types()}")
+            return v_lower
+        return v
+
+    @classmethod
+    def valid_mime_types(cls) -> List[str]:
+        return [m for m, e in MIME_TO_EXTENSION.items() if m.startswith("audio/")]
+
     def get_content_bytes(self) -> Optional[bytes]:
         """Get audio content as raw bytes"""
         if self.content:
@@ -300,6 +331,21 @@ def validate_and_normalize_content(cls, data: Any):
 
         return data
 
+    @field_validator("mime_type")
+    @classmethod
+    def validate_mime_type(cls, v):
+        """Validate that the mime_type is one of the allowed types."""
+        if v is not None:
+            v_lower = v.lower()
+            if v_lower not in cls.valid_mime_types():
+                raise ValueError(f"Invalid MIME type: {v}. Must be one of: {cls.valid_mime_types()}")
+            return v_lower
+        return v
+
+    @classmethod
+    def valid_mime_types(cls) -> List[str]:
+        return [m for m, e in MIME_TO_EXTENSION.items() if m.startswith("video/")]
+
     def get_content_bytes(self) -> Optional[bytes]:
         """Get video content as raw bytes"""
         if self.content:
@@ -345,7 +391,7 @@ def from_base64(
         format: Optional[str] = None,
         **kwargs,
     ) -> "Video":
-        """Create Image from base64 content"""
+        """Create Video from base64 content"""
         import base64
 
         try:
@@ -379,18 +425,18 @@ def to_dict(self, include_base64_content: bool = True) -> Dict[str, Any]:
 
 
 class File(BaseModel):
+    # Core content fields (at least one required)
     id: Optional[str] = None
     url: Optional[str] = None
     filepath: Optional[Union[Path, str]] = None
-    # Raw bytes content of a file
-    content: Optional[Any] = None
-    mime_type: Optional[str] = None
+    content: Optional[Any] = None  # Raw bytes content of a file
+    external: Optional[Any] = None  # External file object (e.g. GeminiFile)
 
+    # Metadata fields
+    mime_type: Optional[str] = None
     file_type: Optional[str] = None
     filename: Optional[str] = None
     size: Optional[int] = None
-    # External file object (e.g. GeminiFile, must be a valid object as expected by the model you are using)
-    external: Optional[Any] = None
     format: Optional[str] = None  # E.g. `pdf`, `txt`, `csv`, `xml`, etc.
     name: Optional[str] = None  # Name of the file, mandatory for AWS Bedrock document input
 
@@ -408,28 +454,17 @@ def check_at_least_one_source(cls, data):
     @classmethod
     def validate_mime_type(cls, v):
         """Validate that the mime_type is one of the allowed types."""
-        if v is not None and v not in cls.valid_mime_types():
-            raise ValueError(f"Invalid MIME type: {v}. Must be one of: {cls.valid_mime_types()}")
+        if v is not None:
+            v_lower = v.lower()
+            if v_lower not in cls.valid_mime_types():
+                raise ValueError(f"Invalid MIME type: {v}. Must be one of: {cls.valid_mime_types()}")
+            return v_lower
         return v
 
     @classmethod
     def valid_mime_types(cls) -> List[str]:
-        return [
-            "application/pdf",
-            "application/json",
-            "application/x-javascript",
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-            "text/javascript",
-            "application/x-python",
-            "text/x-python",
-            "text/plain",
-            "text/html",
-            "text/css",
-            "text/markdown",
-            "text/csv",
-            "text/xml",
-            "text/rtf",
-        ]
+        # Return all MIME types defined in common.py for regular Files/Documents
+        return list(MIME_TO_EXTENSION.keys())
 
     @classmethod
     def from_base64(
 
@@ -278,77 +278,28 @@ async def create_agent_run(
 
         if files:
             for file in files:
-                if file.content_type in [
-                    "image/png",
-                    "image/jpeg",
-                    "image/jpg",
-                    "image/gif",
-                    "image/webp",
-                    "image/bmp",
-                    "image/tiff",
-                    "image/tif",
-                    "image/avif",
-                    "image/heic",
-                    "image/heif",
-                ]:
+                if file.content_type in Image.valid_mime_types():
                     try:
                         base64_image = process_image(file)
                         base64_images.append(base64_image)
                     except Exception as e:
                         log_error(f"Error processing image {file.filename}: {e}")
                         continue
-                elif file.content_type in [
-                    "audio/wav",
-                    "audio/wave",
-                    "audio/mp3",
-                    "audio/mpeg",
-                    "audio/ogg",
-                    "audio/mp4",
-                    "audio/m4a",
-                    "audio/aac",
-                    "audio/flac",
-                ]:
+                elif file.content_type in Audio.valid_mime_types():
                     try:
                         audio = process_audio(file)
                         base64_audios.append(audio)
                     except Exception as e:
                         log_error(f"Error processing audio {file.filename} with content type {file.content_type}: {e}")
                         continue
-                elif file.content_type in [
-                    "video/x-flv",
-                    "video/quicktime",
-                    "video/mpeg",
-                    "video/mpegs",
-                    "video/mpgs",
-                    "video/mpg",
-                    "video/mpg",
-                    "video/mp4",
-                    "video/webm",
-                    "video/wmv",
-                    "video/3gpp",
-                ]:
+                elif file.content_type in Video.valid_mime_types():
                     try:
                         base64_video = process_video(file)
                         base64_videos.append(base64_video)
                     except Exception as e:
                         log_error(f"Error processing video {file.filename}: {e}")
                         continue
-                elif file.content_type in [
-                    "application/pdf",
-                    "application/json",
-                    "application/x-javascript",
-                    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                    "text/javascript",
-                    "application/x-python",
-                    "text/x-python",
-                    "text/plain",
-                    "text/html",
-                    "text/css",
-                    "text/markdown",
-                    "text/csv",
-                    "text/xml",
-                    "text/rtf",
-                ]:
+                elif file.content_type in FileMedia.valid_mime_types():
                     # Process document files
                     try:
                         input_file = process_document(file)
 
@@ -225,53 +225,28 @@ async def create_team_run(
 
         if files:
             for file in files:
-                if file.content_type in [
-                    "image/png",
-                    "image/jpeg",
-                    "image/jpg",
-                    "image/webp",
-                    "image/heic",
-                    "image/heif",
-                ]:
+                if file.content_type in Image.valid_mime_types():
                     try:
                         base64_image = process_image(file)
                         base64_images.append(base64_image)
                     except Exception as e:
                         logger.error(f"Error processing image {file.filename}: {e}")
                         continue
-                elif file.content_type in ["audio/wav", "audio/mp3", "audio/mpeg"]:
+                elif file.content_type in Audio.valid_mime_types():
                     try:
                         base64_audio = process_audio(file)
                         base64_audios.append(base64_audio)
                     except Exception as e:
                         logger.error(f"Error processing audio {file.filename}: {e}")
                         continue
-                elif file.content_type in [
-                    "video/x-flv",
-                    "video/quicktime",
-                    "video/mpeg",
-                    "video/mpegs",
-                    "video/mpgs",
-                    "video/mpg",
-                    "video/mpg",
-                    "video/mp4",
-                    "video/webm",
-                    "video/wmv",
-                    "video/3gpp",
-                ]:
+                elif file.content_type in Video.valid_mime_types():
                     try:
                         base64_video = process_video(file)
                         base64_videos.append(base64_video)
                     except Exception as e:
                         logger.error(f"Error processing video {file.filename}: {e}")
                         continue
-                elif file.content_type in [
-                    "application/pdf",
-                    "text/csv",
-                    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                    "text/plain",
-                    "application/json",
-                ]:
+                elif file.content_type in FileMedia.valid_mime_types():
                     document_file = process_document(file)
                     if document_file is not None:
                         document_files.append(document_file)
 
@@ -21,6 +21,7 @@
 from agno.run.workflow import WorkflowRunOutputEvent
 from agno.team import RemoteTeam, Team
 from agno.tools import Function, Toolkit
+from agno.utils.common import MIME_TO_EXTENSION
 from agno.utils.log import log_warning, logger
 from agno.workflow import RemoteWorkflow, Workflow
 
@@ -491,14 +492,25 @@ def process_document(file: UploadFile) -> Optional[FileMedia]:
 
 
 def extract_format(file: UploadFile) -> Optional[str]:
-    """Extract the File format from file name or content_type."""
-    # Get the format from the filename
-    if file.filename and "." in file.filename:
-        return file.filename.split(".")[-1].lower()
-
-    # Fallback to the file content_type
+    """Extract a standardized file format (extension) from file name or content_type."""
+    # Priority 1: Use filename but only the literal final extension to avoid double extension attacks
+    if file.filename:
+        name_parts = file.filename.split(".")
+        if len(name_parts) > 1:
+            ext = name_parts[-1].lower().strip()
+            if ext:
+                return ext
+
+    # Priority 2: Use explicit mapping for complex MIME types (common in Office/Google Drive)
     if file.content_type:
-        return file.content_type.strip().split("/")[-1]
+        # Handle formats like 'image/png; charset=utf-8'
+        main_type = file.content_type.split(";")[0].strip().lower()
+        if main_type in MIME_TO_EXTENSION:
+            return MIME_TO_EXTENSION[main_type]
+
+        # Priority 3: Fallback to the last part of a standard MIME type (e.g., image/png -> png)
+        if "/" in main_type:
+            return main_type.split("/")[-1].lower()
 
     return None