Ontos-AI · suguanYang · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026
diff --git a/apps/api/alembic/versions/fae1b2c3d4e5_add_document_list_order_index.py b/apps/api/alembic/versions/fae1b2c3d4e5_add_document_list_order_index.py
@@ -0,0 +1,32 @@
+"""add document list order index
+
+Revision ID: fae1b2c3d4e5
+Revises: f9d0e1f2a3b4
+Create Date: 2026-06-30 07:15:00.000000
+"""
+
+from __future__ import annotations
+
+from typing import Sequence, Union
+
+from alembic import op
+
+
+# revision identifiers, used by Alembic.
+revision: str = "fae1b2c3d4e5"
+down_revision: Union[str, Sequence[str], None] = "f9d0e1f2a3b4"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_documents_user_namespace_status_updated
+        ON documents (user_id, namespace, status, updated_at DESC, document_id ASC)
+        """
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS idx_documents_user_namespace_status_updated")
diff --git a/apps/api/app/api/v1/routes/documents.py b/apps/api/app/api/v1/routes/documents.py
@@ -44,19 +44,20 @@ async def _archive_document_response(
 @router.get("")
 async def list_documents(
     namespace: str | None = Query(None, max_length=255),
+    page: int = Query(1, ge=1, description="Page number"),
+    page_size: int = Query(50, ge=1, le=200, description="Items per page"),
     current_user: CurrentUser = Depends(with_current_user),
     db: AsyncSession = Depends(get_db),
 ):
     effective_namespace = normalize_retrieval_namespace(namespace)
-    documents = await _document_service.list_documents(
+    response = await _document_service.list_documents(
         db,
         user_id=current_user.user_id,
         namespace=effective_namespace,
+        page=page,
+        page_size=page_size,
     )
-    return {
-        "namespace": effective_namespace,
-        "documents": documents,
-    }
+    return response
 
 
 @router.get("/{document_id}")
@@ -85,6 +86,10 @@ async def list_document_chunks(
     page: int = Query(1, ge=1, description="Page number"),
     page_size: int = Query(50, ge=1, le=200, description="Items per page"),
     chunk_type: DocumentChunkType | None = Query(None, description="Chunk type filter"),
+    include_asset_urls: bool = Query(
+        False,
+        description="Generate 7-day asset URLs for image/table chunks when true",
+    ),
     current_user: CurrentUser = Depends(with_current_user),
     db: AsyncSession = Depends(get_db),
 ):
@@ -95,6 +100,7 @@ async def list_document_chunks(
         page=page,
         page_size=page_size,
         chunk_type=chunk_type,
+        include_asset_urls=include_asset_urls,
     )
     if response is None:
         raise NotFoundException(
@@ -109,6 +115,10 @@ async def list_document_chunks(
 async def get_document_chunk(
     document_id: str,
     document_chunk_id: str,
+    include_asset_urls: bool = Query(
+        False,
+        description="Generate 7-day asset URLs for image/table chunks when true",
+    ),
     current_user: CurrentUser = Depends(with_current_user),
     db: AsyncSession = Depends(get_db),
 ):
@@ -117,6 +127,7 @@ async def get_document_chunk(
         user_id=current_user.user_id,
         document_id=document_id,
         document_chunk_id=document_chunk_id,
+        include_asset_urls=include_asset_urls,
     )
     if response is None:
         raise NotFoundException(

diff --git a/apps/api/app/repositories/document_repository.py b/apps/api/app/repositories/document_repository.py
@@ -23,16 +23,35 @@ async def list_by_user_namespace(
         *,
         user_id: str,
         namespace: str,
+        limit: int,
+        offset: int,
     ) -> Sequence[Document]:
         result = await db.execute(
             select(Document)
             .where(Document.user_id == user_id)
             .where(Document.namespace == namespace)
             .where(Document.status != "archived")
-            .order_by(Document.updated_at.desc())
+            .order_by(Document.updated_at.desc(), Document.document_id.asc())
+            .limit(limit)
+            .offset(offset)
         )
         return result.scalars().all()
 
+    async def count_by_user_namespace(
+        self,
+        db: AsyncSession,
+        *,
+        user_id: str,
+        namespace: str,
+    ) -> int:
+        result = await db.execute(
+            select(func.count(Document.document_id))
+            .where(Document.user_id == user_id)
+            .where(Document.namespace == namespace)
+            .where(Document.status != "archived")
+        )
+        return int(result.scalar_one())
+
     async def get_document(
         self,
         db: AsyncSession,

diff --git a/apps/api/app/services/documents/lifecycle_service.py b/apps/api/app/services/documents/lifecycle_service.py
@@ -15,12 +15,44 @@
     invalidate_retrieval_cache_namespaces,
 )
 from shared.services.retrieval.graph.service import DocumentGraphService, GraphScope
+from shared.services.storage.result_storage import ResultStorage, get_result_storage
+
+_DOCUMENT_CHUNK_ASSET_URL_EXPIRES_SECONDS = 7 * 24 * 60 * 60
+_MEDIA_CHUNK_TYPES = frozenset({"image", "table"})
 
 
 def _datetime_payload(value: datetime | None) -> str | None:
     return value.isoformat() if value else None
 
 
+def _document_chunk_asset_url(
+    *,
+    chunk_type: str,
+    job_id: str | None,
+    file_path: str | None,
+    include_asset_urls: bool,
+    result_storage: ResultStorage | None,
+) -> str | None:
+    if (
+        not include_asset_urls
+        or chunk_type not in _MEDIA_CHUNK_TYPES
+        or not job_id
+        or not file_path
+        or result_storage is None
+    ):
+        return None
+
+    try:
+        return result_storage.generate_artifact_url(
+            job_id=job_id,
+            artifact_ref=file_path,
+            expires_in=_DOCUMENT_CHUNK_ASSET_URL_EXPIRES_SECONDS,
+        )
+    except Exception as exc:
+        logger.warning(f"Failed to generate document chunk asset URL (ignored): {exc}")
+        return None
+
+
 def document_payload(document) -> dict[str, Any]:
     return {
         "document_id": document.document_id,
@@ -53,13 +85,31 @@ async def list_documents(
         *,
         user_id: str,
         namespace: str,
-    ) -> list[dict[str, Any]]:
+        page: int,
+        page_size: int,
+    ) -> dict[str, Any]:
+        total = await self._repository.count_by_user_namespace(
+            db,
+            user_id=user_id,
+            namespace=namespace,
+        )
         documents = await self._repository.list_by_user_namespace(
             db,
             user_id=user_id,
             namespace=namespace,
+            limit=page_size,
+            offset=(page - 1) * page_size,
         )
-        return [document_payload(document) for document in documents]
+        return {
+            "namespace": namespace,
+            "documents": [document_payload(document) for document in documents],
+            "pagination": {
+                "page": page,
+                "page_size": page_size,
+                "total": total,
+                "total_pages": math.ceil(total / page_size) if total else 0,
+            },
+        }
 
     async def list_document_chunks(
         self,
@@ -70,6 +120,7 @@ async def list_document_chunks(
         page: int,
         page_size: int,
         chunk_type: str | None,
+        include_asset_urls: bool,
     ) -> dict[str, Any] | None:
         document = await self._repository.get_document(
             db,
@@ -110,10 +161,14 @@ async def list_document_chunks(
             offset=(page - 1) * page_size,
             chunk_type=normalized_chunk_type,
         )
+        result_storage = get_result_storage() if include_asset_urls else None
         chunks = [
             self._chunk_payload(
                 chunk=chunk,
                 section=section,
+                job_id=job_result.job_id,
+                include_asset_urls=include_asset_urls,
+                result_storage=result_storage,
             )
             for chunk, section, job_result in rows
         ]
@@ -140,6 +195,7 @@ async def get_document_chunk(
         user_id: str,
         document_id: str,
         document_chunk_id: str,
+        include_asset_urls: bool,
     ) -> dict[str, Any] | None:
         document = await self._repository.get_document(
             db,
@@ -159,6 +215,7 @@ async def get_document_chunk(
             return None
 
         chunk, section, job_result = row
+        result_storage = get_result_storage() if include_asset_urls else None
         return {
             "document_id": document.document_id,
             "namespace": document.namespace,
@@ -167,6 +224,9 @@ async def get_document_chunk(
             "chunk": self._chunk_payload(
                 chunk=chunk,
                 section=section,
+                job_id=job_result.job_id,
+                include_asset_urls=include_asset_urls,
+                result_storage=result_storage,
             ),
         }
 
@@ -191,6 +251,9 @@ def _chunk_payload(
         *,
         chunk: DocumentChunk,
         section: DocumentSection | None,
+        job_id: str | None,
+        include_asset_urls: bool,
+        result_storage: ResultStorage | None,
     ) -> dict[str, Any]:
         chunk_type = _normalize_chunk_type(chunk.chunk_type)
         file_path = chunk.file_path
@@ -205,6 +268,13 @@ def _chunk_payload(
             "file_path": file_path,
             "sort_order": chunk.sort_order,
             "metadata": chunk.chunk_metadata,
+            "asset_url": _document_chunk_asset_url(
+                chunk_type=chunk_type,
+                job_id=job_id,
+                file_path=file_path,
+                include_asset_urls=include_asset_urls,
+                result_storage=result_storage,
+            ),
             "created_at": _datetime_payload(chunk.created_at),
         }