From 7dcbf2fe6d41f17c0dee2a6c441b8358f31f5913 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 8 Oct 2024 23:04:14 +0700
Subject: [PATCH 01/32] tmp

---
 .../engines/python/agent/tools/artifact.py    |  18 ++-
 .../engines/python/agent/tools/interpreter.py |   2 +-
 .../components/routers/python/sandbox.py      |  25 +++-
 .../streaming/fastapi/app/api/routers/chat.py |   8 +-
 .../fastapi/app/api/routers/models.py         | 135 ++++++++++++++----
 .../fastapi/app/api/routers/upload.py         |  16 ++-
 .../app/components/ui/chat/hooks/use-file.ts  |  41 ++----
 .../nextjs/app/components/ui/chat/index.ts    |  10 +-
 .../components/ui/chat/widgets/Artifact.tsx   |   1 +
 9 files changed, 189 insertions(+), 67 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/artifact.py b/templates/components/engines/python/agent/tools/artifact.py
index 4c877b2fd..70abb96d4 100644
--- a/templates/components/engines/python/agent/tools/artifact.py
+++ b/templates/components/engines/python/agent/tools/artifact.py
@@ -66,11 +66,17 @@ class CodeGeneratorTool:
     def __init__(self):
         pass
 
-    def artifact(self, query: str, old_code: Optional[str] = None) -> Dict:
+    def artifact(
+        self,
+        query: str,
+        sandbox_files: Optional[List[str]] = None,
+        old_code: Optional[str] = None,
+    ) -> Dict:
         """Generate a code artifact based on the input.
 
         Args:
             query (str): The description of the application you want to build.
+            sandbox_files (Optional[List[str]], optional): The list of sandbox file paths. Defaults to None.
             old_code (Optional[str], optional): The existing code to be modified. Defaults to None.
 
         Returns:
@@ -81,16 +87,20 @@ def artifact(self, query: str, old_code: Optional[str] = None) -> Dict:
             user_message = f"{query}\n\nThe existing code is: \n```\n{old_code}\n```"
         else:
             user_message = query
+        if sandbox_files:
+            user_message += f"\n\nThe provided files are: \n{str(sandbox_files)}"
 
         messages: List[ChatMessage] = [
             ChatMessage(role="system", content=CODE_GENERATION_PROMPT),
             ChatMessage(role="user", content=user_message),
         ]
         try:
-            sllm = Settings.llm.as_structured_llm(output_cls=CodeArtifact)  # type: ignore
+            sllm = Settings.llm.as_structured_llm(output_cls=CodeArtifact)
             response = sllm.chat(messages)
-            data: CodeArtifact = response.raw
-            return data.model_dump()
+            data: CodeArtifact = response.raw.model_dump()
+            if sandbox_files:
+                data["files"] = sandbox_files
+            return data
         except Exception as e:
             logger.error(f"Failed to generate artifact: {str(e)}")
             raise e
diff --git a/templates/components/engines/python/agent/tools/interpreter.py b/templates/components/engines/python/agent/tools/interpreter.py
index 0f4c10b95..5bbadbd75 100644
--- a/templates/components/engines/python/agent/tools/interpreter.py
+++ b/templates/components/engines/python/agent/tools/interpreter.py
@@ -45,7 +45,7 @@ def __init__(self, api_key: str = None):
         self.interpreter = CodeInterpreter(api_key=api_key)
 
     def __del__(self):
-        self.interpreter.close()
+        self.interpreter.kill()
 
     def get_output_path(self, filename: str) -> str:
         # if output directory doesn't exist, create it
diff --git a/templates/components/routers/python/sandbox.py b/templates/components/routers/python/sandbox.py
index c5a2a3670..2498a5fc8 100644
--- a/templates/components/routers/python/sandbox.py
+++ b/templates/components/routers/python/sandbox.py
@@ -54,12 +54,19 @@ def to_response(self):
         }
 
 
+class FileUpload(BaseModel):
+    id: str
+    name: str
+
+
 @sandbox_router.post("")
 async def create_sandbox(request: Request):
     request_data = await request.json()
+    artifact_data = request_data["artifact"]
+    sandbox_files = artifact_data["files"]
 
     try:
-        artifact = CodeArtifact(**request_data["artifact"])
+        artifact = CodeArtifact(**artifact_data)
     except Exception:
         logger.error(f"Could not create artifact from request data: {request_data}")
         return HTTPException(
@@ -94,6 +101,9 @@ async def create_sandbox(request: Request):
                 f"Installed dependencies: {', '.join(artifact.additional_dependencies)} in sandbox {sbx}"
             )
 
+    # Copy files
+    _upload_files(sbx, sandbox_files)
+
     # Copy code to disk
     if isinstance(artifact.code, list):
         for file in artifact.code:
@@ -126,6 +136,19 @@ async def create_sandbox(request: Request):
         ).to_response()
 
 
+def _upload_files(
+    sandbox: Union[CodeInterpreter, Sandbox],
+    sandbox_files: List[str] = [],
+) -> None:
+    for file_path in sandbox_files:
+        file_name = os.path.basename(file_path)
+        local_file_path = f"output/uploaded/{file_name}"
+        with open(local_file_path, "rb") as f:
+            content = f.read()
+            sandbox.files.write(file_path, content)
+    return None
+
+
 def _download_cell_results(cell_results: Optional[List]) -> List[Dict[str, str]]:
     """
     To pull results from code interpreter cell and save them to disk for serving
diff --git a/templates/types/streaming/fastapi/app/api/routers/chat.py b/templates/types/streaming/fastapi/app/api/routers/chat.py
index 7e96c9274..6edbd9e00 100644
--- a/templates/types/streaming/fastapi/app/api/routers/chat.py
+++ b/templates/types/streaming/fastapi/app/api/routers/chat.py
@@ -15,6 +15,7 @@
 from app.api.routers.vercel_response import VercelStreamResponse
 from app.engine.engine import get_chat_engine
 from app.engine.query_filter import generate_filters
+from app.engine.tools import ToolFactory
 
 chat_router = r = APIRouter()
 
@@ -28,8 +29,13 @@ async def chat(
     data: ChatData,
     background_tasks: BackgroundTasks,
 ):
+    # Check is code interpreter is enabled
+    tools = ToolFactory.from_env(map_result=True)
+    only_file_metadata = False
+    if "interpreter" in tools or "artifact" in tools:
+        only_file_metadata = True
     try:
-        last_message_content = data.get_last_message_content()
+        last_message_content = data.get_last_message_content(only_file_metadata)
         messages = data.get_history_messages()
 
         doc_ids = data.get_chat_document_ids()
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 17c63e59c..044bf0da5 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -1,6 +1,7 @@
 import logging
 import os
-from typing import Any, Dict, List, Literal, Optional, Union
+from textwrap import dedent
+from typing import Any, Dict, List, Literal, Optional
 
 from llama_index.core.llms import ChatMessage, MessageRole
 from llama_index.core.schema import NodeWithScore
@@ -19,12 +20,71 @@ class FileContent(BaseModel):
     value: str | List[str]
 
 
+class FileMetadata(BaseModel):
+    id: str
+    name: str
+    url: Optional[str] = None
+    refs: Optional[List[str]] = None
+
+    def to_llm_content(self) -> str:
+        """
+        Construct content for LLM from the file metadata
+        """
+        default_content = dedent(
+            f"""
+            ====={self.name}=====\n
+            """
+        )
+        if self.url is not None:
+            default_content += f"File URL: {self.url}\n"
+        else:
+            # Construct url from file name
+            url = f"https://{os.getenv('FILESERVER_URL_PREFIX')}/output/uploaded/{self.name}"
+            default_content += (
+                f"File URL (instruction: do not update this file URL yourself): {url}\n"
+            )
+        if self.refs is not None:
+            default_content += f"Document IDs: {self.refs}\n"
+        # construct additional metadata for code interpreter
+        sandbox_file_path = f"/tmp/{self.name}"
+        default_content += f"Sandbox file path: {sandbox_file_path}\n"
+        return default_content
+
+
 class File(BaseModel):
     id: str
-    content: FileContent
-    filename: str
-    filesize: int
     filetype: str
+    metadata: FileMetadata
+
+    def _load_file_content(self) -> str:
+        file_path = f"output/uploaded/{self.metadata.name}"
+        with open(file_path, "r") as file:
+            return file.read()
+
+    def to_llm_content(
+        self,
+        only_file_metadata: bool = False,
+        ignore_unsupported_filetype: bool = True,
+    ) -> str:
+        """
+        Construct content for LLM from the file
+        Args:
+            ignore_unsupported_filetype: If True, ignore the file type that is not supported
+        Returns:
+            The content for LLM
+        """
+        llm_content = ""
+        llm_content += self.metadata.to_llm_content()
+        if not only_file_metadata:
+            if self.filetype == "csv" or self.filetype == "txt":
+                file_content = self._load_file_content()
+                llm_content += f"Content:\n{file_content}\n"
+            else:
+                if ignore_unsupported_filetype:
+                    return f"Content:\nCould not load content for this file because the file type {self.filetype} is not supported"
+                else:
+                    raise ValueError(f"Unsupported file type: {self.filetype}")
+        return llm_content
 
 
 class AnnotationFileData(BaseModel):
@@ -62,24 +122,24 @@ class ArtifactAnnotation(BaseModel):
 
 class Annotation(BaseModel):
     type: str
-    data: Union[AnnotationFileData, List[str], AgentAnnotation, ArtifactAnnotation]
+    data: AnnotationFileData | List[str] | AgentAnnotation | ArtifactAnnotation
 
-    def to_content(self) -> Optional[str]:
+    def to_content(self, only_file_metadata: bool = False) -> str | None:
+        # Note: This code only handles files that were not indexed in the vector database
+        # (i.e., files not uploaded through the upload file API)
         if self.type == "document_file":
-            if isinstance(self.data, AnnotationFileData):
-                # We only support generating context content for CSV files for now
-                csv_files = [file for file in self.data.files if file.filetype == "csv"]
-                if len(csv_files) > 0:
-                    return "Use data from following CSV raw content\n" + "\n".join(
-                        [
-                            f"```csv\n{csv_file.content.value}\n```"
-                            for csv_file in csv_files
-                        ]
-                    )
-            else:
-                logger.warning(
-                    f"Unexpected data type for document_file annotation: {type(self.data)}"
+            assert isinstance(self.data, AnnotationFileData)
+            # We only support generating context content for CSV files for now
+            # iterate through all files and construct content for LLM
+            file_contents = [
+                file.to_llm_content(only_file_metadata) for file in self.data.files
+            ]
+            if len(file_contents) > 0:
+                return "Use data from following files content\n" + "\n".join(
+                    file_contents
                 )
+        elif self.type == "image":
+            raise NotImplementedError("Use image file is not supported yet!")
         else:
             logger.warning(
                 f"The annotation {self.type} is not supported for generating context content"
@@ -115,10 +175,15 @@ def messages_must_not_be_empty(cls, v):
             raise ValueError("Messages must not be empty")
         return v
 
-    def get_last_message_content(self) -> str:
+    def get_last_message_content(
+        self,
+        only_file_metadata: bool = False,
+    ) -> str:
         """
         Get the content of the last message along with the data content if available.
         Fallback to use data content from previous messages
+        Args:
+            only_file_metadata: Either include the uploaded file content or not. If false, only file metadata will be included which is useful for code interpreter tool
         """
         if len(self.messages) == 0:
             raise ValueError("There is not any message in the chat")
@@ -128,7 +193,10 @@ def get_last_message_content(self) -> str:
             if message.role == MessageRole.USER and message.annotations is not None:
                 annotation_contents = filter(
                     None,
-                    [annotation.to_content() for annotation in message.annotations],
+                    [
+                        annotation.to_content(only_file_metadata)
+                        for annotation in message.annotations
+                    ],
                 )
                 if not annotation_contents:
                     continue
@@ -175,7 +243,12 @@ def _get_latest_code_artifact(self) -> Optional[str]:
                     ):
                         tool_output = annotation.data.toolOutput
                         if tool_output and not tool_output.get("isError", False):
-                            return tool_output.get("output", {}).get("code", None)
+                            print("tool_output", tool_output)
+                            output = tool_output.get("output", {})
+                            if isinstance(output, dict) and output.get("code"):
+                                return output.get("code")
+                            else:
+                                return None
         return None
 
     def get_history_messages(
@@ -221,14 +294,24 @@ def get_chat_document_ids(self) -> List[str]:
                 for annotation in message.annotations:
                     if (
                         annotation.type == "document_file"
-                        and isinstance(annotation.data, AnnotationFileData)
                         and annotation.data.files is not None
                     ):
                         for fi in annotation.data.files:
-                            if fi.content.type == "ref":
-                                document_ids += fi.content.value
+                            if fi.metadata.refs is not None:
+                                document_ids += fi.metadata.refs
         return list(set(document_ids))
 
+    def get_uploaded_files(self) -> List[Dict[str, Any]]:
+        """
+        Get the uploaded files from the chat data
+        """
+        for message in self.messages:
+            if message.role == MessageRole.USER and message.annotations is not None:
+                for annotation in message.annotations:
+                    if annotation.type == "document_file":
+                        return [file.model_dump() for file in annotation.data.files]
+        return []
+
 
 class SourceNodes(BaseModel):
     id: str
@@ -251,7 +334,7 @@ def from_source_node(cls, source_node: NodeWithScore):
         )
 
     @classmethod
-    def get_url_from_metadata(cls, metadata: Dict[str, Any]) -> Optional[str]:
+    def get_url_from_metadata(cls, metadata: Dict[str, Any]) -> str:
         url_prefix = os.getenv("FILESERVER_URL_PREFIX")
         if not url_prefix:
             logger.warning(
diff --git a/templates/types/streaming/fastapi/app/api/routers/upload.py b/templates/types/streaming/fastapi/app/api/routers/upload.py
index ccc03004b..5c6b985c9 100644
--- a/templates/types/streaming/fastapi/app/api/routers/upload.py
+++ b/templates/types/streaming/fastapi/app/api/routers/upload.py
@@ -1,8 +1,9 @@
 import logging
-from typing import List, Any
+import uuid
+from typing import Any, Dict
 
 from fastapi import APIRouter, HTTPException
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 from app.api.services.file import PrivateFileService
 
@@ -12,18 +13,23 @@
 
 
 class FileUploadRequest(BaseModel):
+    id: str = Field(
+        default_factory=lambda: str(uuid.uuid4()),
+        description="The session id (optional, if not provided, a new session will be created)",
+    )
     base64: str
     filename: str
     params: Any = None
 
 
 @r.post("")
-def upload_file(request: FileUploadRequest) -> List[str]:
+def upload_file(request: FileUploadRequest) -> Dict[str, Any]:
     try:
-        logger.info("Processing file")
-        return PrivateFileService.process_file(
+        logger.info(f"Processing file for session {request.id}")
+        file_meta = PrivateFileService.process_file(
             request.filename, request.base64, request.params
         )
+        return file_meta.to_upload_response()
     except Exception as e:
         logger.error(f"Error processing file: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail="Error processing file")
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/hooks/use-file.ts b/templates/types/streaming/nextjs/app/components/ui/chat/hooks/use-file.ts
index cc49169ac..2aa9776e2 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/hooks/use-file.ts
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/hooks/use-file.ts
@@ -2,12 +2,12 @@
 
 import { JSONValue } from "llamaindex";
 import { useState } from "react";
-import { v4 as uuidv4 } from "uuid";
 import {
   DocumentFile,
   DocumentFileType,
   MessageAnnotation,
   MessageAnnotationType,
+  UploadedFileMeta,
 } from "..";
 import { useClientConfig } from "./use-config";
 
@@ -51,7 +51,7 @@ export function useFile() {
   const uploadContent = async (
     file: File,
     requestParams: any = {},
-  ): Promise<string[]> => {
+  ): Promise<UploadedFileMeta> => {
     const base64 = await readContent({ file, asUrl: true });
     const uploadAPI = `${backend}/api/chat/upload`;
     const response = await fetch(uploadAPI, {
@@ -66,7 +66,7 @@ export function useFile() {
       }),
     });
     if (!response.ok) throw new Error("Failed to upload document.");
-    return await response.json();
+    return (await response.json()) as UploadedFileMeta;
   };
 
   const getAnnotations = () => {
@@ -112,34 +112,19 @@ export function useFile() {
 
     const filetype = docMineTypeMap[file.type];
     if (!filetype) throw new Error("Unsupported document type.");
-    const newDoc: Omit<DocumentFile, "content"> = {
-      id: uuidv4(),
-      filetype,
+    const uploadedFileMeta = await uploadContent(file, requestParams);
+    const newDoc: DocumentFile = {
+      id: uploadedFileMeta.id,
       filename: file.name,
       filesize: file.size,
+      filetype,
+      content: {
+        type: "ref",
+        value: uploadedFileMeta.refs || [],
+      },
+      metadata: uploadedFileMeta,
     };
-    switch (file.type) {
-      case "text/csv": {
-        const content = await readContent({ file });
-        return addDoc({
-          ...newDoc,
-          content: {
-            type: "text",
-            value: content,
-          },
-        });
-      }
-      default: {
-        const ids = await uploadContent(file, requestParams);
-        return addDoc({
-          ...newDoc,
-          content: {
-            type: "ref",
-            value: ids,
-          },
-        });
-      }
-    }
+    return addDoc(newDoc);
   };
 
   return {
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/index.ts b/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
index a9aba73e5..8ce8198d6 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
@@ -26,12 +26,20 @@ export type DocumentFileContent = {
   value: string[] | string;
 };
 
-export type DocumentFile = {
+export type UploadedFileMeta = {
   id: string;
+  name: string;
+  url?: string;
+  refs?: string[];
+};
+
+export type DocumentFile = {
+  id?: string;
   filename: string;
   filesize: number;
   filetype: DocumentFileType;
   content: DocumentFileContent;
+  metadata?: UploadedFileMeta;
 };
 
 export type DocumentFileData = {
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/Artifact.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/Artifact.tsx
index 6f808efd9..ec1102bf4 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/Artifact.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/Artifact.tsx
@@ -26,6 +26,7 @@ export type CodeArtifact = {
   port: number | null;
   file_path: string;
   code: string;
+  files?: string[];
 };
 
 type ArtifactResult = {

From 1e2502a88bb72749f704521b7652505019ee5c7a Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 11 Oct 2024 13:01:40 +0700
Subject: [PATCH 02/32] update private file handler

---
 templates/components/services/python/file.py  | 144 ++++++++++++------
 .../fastapi/app/api/routers/upload.py         |  14 +-
 .../fastapi/app/engine/utils/file_helper.py   |  12 +-
 3 files changed, 114 insertions(+), 56 deletions(-)

diff --git a/templates/components/services/python/file.py b/templates/components/services/python/file.py
index d52671818..6c73cec54 100644
--- a/templates/components/services/python/file.py
+++ b/templates/components/services/python/file.py
@@ -1,11 +1,14 @@
 import base64
 import mimetypes
 import os
+import re
+import uuid
 from io import BytesIO
 from pathlib import Path
 from typing import List, Optional, Tuple
 
 from app.engine.index import IndexConfig, get_index
+from app.engine.utils.file_helper import FileMetadata, save_file
 from llama_index.core import VectorStoreIndex
 from llama_index.core.ingestion import IngestionPipeline
 from llama_index.core.readers.file.base import (
@@ -31,14 +34,19 @@ def get_llamaparse_parser():
 def default_file_loaders_map():
     default_loaders = get_file_loaders_map()
     default_loaders[".txt"] = FlatReader
+    default_loaders[".csv"] = FlatReader
     return default_loaders
 
 
 class PrivateFileService:
+    """
+    To store the files uploaded by the user and add them to the index.
+    """
+
     PRIVATE_STORE_PATH = "output/uploaded"
 
     @staticmethod
-    def preprocess_base64_file(base64_content: str) -> Tuple[bytes, str | None]:
+    def _preprocess_base64_file(base64_content: str) -> Tuple[bytes, str | None]:
         header, data = base64_content.split(",", 1)
         mime_type = header.split(";")[0].split(":", 1)[1]
         extension = mimetypes.guess_extension(mime_type)
@@ -46,25 +54,35 @@ def preprocess_base64_file(base64_content: str) -> Tuple[bytes, str | None]:
         return base64.b64decode(data), extension
 
     @staticmethod
-    def store_and_parse_file(file_name, file_data, extension) -> List[Document]:
+    def _store_file(file_name, file_data) -> FileMetadata:
+        """
+        Store the file to the private directory and return the file metadata
+        """
         # Store file to the private directory
         os.makedirs(PrivateFileService.PRIVATE_STORE_PATH, exist_ok=True)
         file_path = Path(os.path.join(PrivateFileService.PRIVATE_STORE_PATH, file_name))
 
-        # write file
-        with open(file_path, "wb") as f:
-            f.write(file_data)
+        return save_file(file_data, file_path=file_path)
+
+    @staticmethod
+    def _load_file_to_documents(file_metadata: FileMetadata) -> List[Document]:
+        """
+        Load the file from the private directory and return the documents
+        """
+        file_path = file_metadata.outputPath
+        file_name = file_metadata.filename
+        extension = file_path.split(".")[-1]
 
         # Load file to documents
         # If LlamaParse is enabled, use it to parse the file
         # Otherwise, use the default file loaders
         reader = get_llamaparse_parser()
         if reader is None:
-            reader_cls = default_file_loaders_map().get(extension)
+            reader_cls = default_file_loaders_map().get(f".{extension}")
             if reader_cls is None:
                 raise ValueError(f"File extension {extension} is not supported")
             reader = reader_cls()
-        documents = reader.load_data(file_path)
+        documents = reader.load_data(Path(file_path))
         # Add custom metadata
         for doc in documents:
             doc.metadata["file_name"] = file_name
@@ -72,53 +90,85 @@ def store_and_parse_file(file_name, file_data, extension) -> List[Document]:
         return documents
 
     @staticmethod
+    def _add_documents_to_vector_store_index(
+        documents: List[Document], index: VectorStoreIndex
+    ) -> None:
+        """
+        Add the documents to the vector store index
+        """
+        pipeline = IngestionPipeline()
+        nodes = pipeline.run(documents=documents)
+
+        # Add the nodes to the index and persist it
+        if index is None:
+            index = VectorStoreIndex(nodes=nodes)
+        else:
+            index.insert_nodes(nodes=nodes)
+        index.storage_context.persist(
+            persist_dir=os.environ.get("STORAGE_DIR", "storage")
+        )
+
+    @staticmethod
+    def _add_file_to_llama_cloud_index(
+        index: LlamaCloudIndex,
+        file_name: str,
+        file_data: bytes,
+    ) -> None:
+        """
+        Add the file to the LlamaCloud index.
+        LlamaCloudIndex is a managed index so we can directly use the files.
+        """
+        try:
+            from app.engine.service import LLamaCloudFileService
+        except ImportError:
+            raise ValueError("LlamaCloudFileService is not found")
+
+        project_id = index._get_project_id()
+        pipeline_id = index._get_pipeline_id()
+        # LlamaCloudIndex is a managed index so we can directly use the files
+        upload_file = (file_name, BytesIO(file_data))
+        return [
+            LLamaCloudFileService.add_file_to_pipeline(
+                project_id,
+                pipeline_id,
+                upload_file,
+                custom_metadata={},
+            )
+        ]
+
+    @staticmethod
+    def _sanitize_file_name(file_name: str) -> str:
+        file_name, extension = os.path.splitext(file_name)
+        return re.sub(r"[^a-zA-Z0-9]", "_", file_name) + extension
+
+    @classmethod
     def process_file(
-        file_name: str, base64_content: str, params: Optional[dict] = None
-    ) -> List[str]:
+        cls,
+        file_name: str,
+        base64_content: str,
+        params: Optional[dict] = None,
+    ) -> FileMetadata:
         if params is None:
             params = {}
 
-        file_data, extension = PrivateFileService.preprocess_base64_file(base64_content)
-
         # Add the nodes to the index and persist it
         index_config = IndexConfig(**params)
-        current_index = get_index(index_config)
+        index = get_index(index_config)
 
-        # Insert the documents into the index
-        if isinstance(current_index, LlamaCloudIndex):
-            from app.engine.service import LLamaCloudFileService
+        # Generate a new file name if the same file is uploaded multiple times
+        file_id = str(uuid.uuid4())
+        new_file_name = f"{file_id}_{cls._sanitize_file_name(file_name)}"
+
+        # Preprocess and store the file
+        file_data, extension = cls._preprocess_base64_file(base64_content)
+        file_metadata = cls._store_file(new_file_name, file_data)
 
-            project_id = current_index._get_project_id()
-            pipeline_id = current_index._get_pipeline_id()
-            # LlamaCloudIndex is a managed index so we can directly use the files
-            upload_file = (file_name, BytesIO(file_data))
-            return [
-                LLamaCloudFileService.add_file_to_pipeline(
-                    project_id,
-                    pipeline_id,
-                    upload_file,
-                    custom_metadata={
-                        # Set private=true to mark the document as private user docs (required for filtering)
-                        "private": "true",
-                    },
-                )
-            ]
+        # Insert the file into the index
+        if isinstance(index, LlamaCloudIndex):
+            _ = cls._add_file_to_llama_cloud_index(index, new_file_name, file_data)
         else:
-            # First process documents into nodes
-            documents = PrivateFileService.store_and_parse_file(
-                file_name, file_data, extension
-            )
-            pipeline = IngestionPipeline()
-            nodes = pipeline.run(documents=documents)
-
-            # Add the nodes to the index and persist it
-            if current_index is None:
-                current_index = VectorStoreIndex(nodes=nodes)
-            else:
-                current_index.insert_nodes(nodes=nodes)
-            current_index.storage_context.persist(
-                persist_dir=os.environ.get("STORAGE_DIR", "storage")
-            )
+            documents = cls._load_file_to_documents(file_metadata)
+            cls._add_documents_to_vector_store_index(documents, index)
 
-            # Return the document ids
-            return [doc.doc_id for doc in documents]
+        # Return the file metadata
+        return file_metadata
diff --git a/templates/types/streaming/fastapi/app/api/routers/upload.py b/templates/types/streaming/fastapi/app/api/routers/upload.py
index 5c6b985c9..78aff33cd 100644
--- a/templates/types/streaming/fastapi/app/api/routers/upload.py
+++ b/templates/types/streaming/fastapi/app/api/routers/upload.py
@@ -1,9 +1,8 @@
 import logging
-import uuid
 from typing import Any, Dict
 
 from fastapi import APIRouter, HTTPException
-from pydantic import BaseModel, Field
+from pydantic import BaseModel
 
 from app.api.services.file import PrivateFileService
 
@@ -13,10 +12,6 @@
 
 
 class FileUploadRequest(BaseModel):
-    id: str = Field(
-        default_factory=lambda: str(uuid.uuid4()),
-        description="The session id (optional, if not provided, a new session will be created)",
-    )
     base64: str
     filename: str
     params: Any = None
@@ -24,8 +19,13 @@ class FileUploadRequest(BaseModel):
 
 @r.post("")
 def upload_file(request: FileUploadRequest) -> Dict[str, Any]:
+    """
+    To upload a private file from the chat UI.
+    Returns:
+        The metadata of the uploaded file.
+    """
     try:
-        logger.info(f"Processing file for session {request.id}")
+        logger.info(f"Processing file: {request.filename}")
         file_meta = PrivateFileService.process_file(
             request.filename, request.base64, request.params
         )
diff --git a/templates/types/streaming/fastapi/app/engine/utils/file_helper.py b/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
index c794a3b47..6452cdf85 100644
--- a/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
+++ b/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
@@ -1,7 +1,7 @@
 import logging
 import os
 import uuid
-from typing import Optional
+from typing import Any, Dict, Optional
 
 from pydantic import BaseModel
 
@@ -13,6 +13,14 @@ class FileMetadata(BaseModel):
     filename: str
     url: str
 
+    def to_upload_response(self) -> Dict[str, Any]:
+        file_id = self.outputPath.split("/")[-1]
+        return {
+            "id": file_id,
+            "name": self.filename,
+            "url": self.url,
+        }
+
 
 def save_file(
     content: bytes | str,
@@ -58,7 +66,7 @@ def save_file(
     logger.info(f"Saved file to {file_path}")
 
     return FileMetadata(
-        outputPath=file_path,
+        outputPath=file_path if isinstance(file_path, str) else str(file_path),
         filename=file_name,
         url=f"{os.getenv('FILESERVER_URL_PREFIX')}/{file_path}",
     )

From e12bd298b55cd95388ecf9a247a23d38346f4023 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 11 Oct 2024 14:47:40 +0700
Subject: [PATCH 03/32] enhance code

---
 .../engines/python/agent/tools/artifact.py          |  8 ++++----
 templates/components/routers/python/sandbox.py      | 13 ++++++++-----
 .../app/components/ui/chat/widgets/Artifact.tsx     |  6 +++---
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/artifact.py b/templates/components/engines/python/agent/tools/artifact.py
index 70abb96d4..6c16012e8 100644
--- a/templates/components/engines/python/agent/tools/artifact.py
+++ b/templates/components/engines/python/agent/tools/artifact.py
@@ -72,15 +72,15 @@ def artifact(
         sandbox_files: Optional[List[str]] = None,
         old_code: Optional[str] = None,
     ) -> Dict:
-        """Generate a code artifact based on the input.
+        """Generate a code artifact based on the provided input.
 
         Args:
-            query (str): The description of the application you want to build.
-            sandbox_files (Optional[List[str]], optional): The list of sandbox file paths. Defaults to None.
+            query (str): A description of the application you want to build.
+            sandbox_files (Optional[List[str]], optional): A list of sandbox file paths. Defaults to None. Include these files if the code requires them.
             old_code (Optional[str], optional): The existing code to be modified. Defaults to None.
 
         Returns:
-            Dict: A dictionary containing the generated artifact information.
+            Dict: A dictionary containing information about the generated artifact.
         """
 
         if old_code:
diff --git a/templates/components/routers/python/sandbox.py b/templates/components/routers/python/sandbox.py
index 2498a5fc8..c754e42c4 100644
--- a/templates/components/routers/python/sandbox.py
+++ b/templates/components/routers/python/sandbox.py
@@ -16,7 +16,8 @@
 import logging
 import os
 import uuid
-from typing import Dict, List, Optional, Union
+from dataclasses import asdict
+from typing import Any, Dict, List, Optional, Union
 
 from app.engine.tools.artifact import CodeArtifact
 from app.engine.utils.file_helper import save_file
@@ -36,7 +37,7 @@ class ExecutionResult(BaseModel):
     template: str
     stdout: List[str]
     stderr: List[str]
-    runtime_error: Optional[Dict[str, Union[str, List[str]]]] = None
+    runtime_error: Optional[Dict[str, Any]] = None
     output_urls: List[Dict[str, str]]
     url: Optional[str]
 
@@ -63,7 +64,7 @@ class FileUpload(BaseModel):
 async def create_sandbox(request: Request):
     request_data = await request.json()
     artifact_data = request_data["artifact"]
-    sandbox_files = artifact_data["files"]
+    sandbox_files = artifact_data.get("files", [])
 
     try:
         artifact = CodeArtifact(**artifact_data)
@@ -102,7 +103,8 @@ async def create_sandbox(request: Request):
             )
 
     # Copy files
-    _upload_files(sbx, sandbox_files)
+    if len(sandbox_files) > 0:
+        _upload_files(sbx, sandbox_files)
 
     # Copy code to disk
     if isinstance(artifact.code, list):
@@ -117,11 +119,12 @@ async def create_sandbox(request: Request):
     if artifact.template == "code-interpreter-multilang":
         result = sbx.notebook.exec_cell(artifact.code or "")
         output_urls = _download_cell_results(result.results)
+        runtime_error = asdict(result.error) if result.error else None
         return ExecutionResult(
             template=artifact.template,
             stdout=result.logs.stdout,
             stderr=result.logs.stderr,
-            runtime_error=result.error,
+            runtime_error=runtime_error,
             output_urls=output_urls,
             url=None,
         ).to_response()
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/Artifact.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/Artifact.tsx
index ec1102bf4..fa2a60592 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/widgets/Artifact.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/widgets/Artifact.tsx
@@ -202,10 +202,10 @@ function ArtifactOutput({
 function RunTimeError({
   runtimeError,
 }: {
-  runtimeError: { name: string; value: string; tracebackRaw: string[] };
+  runtimeError: { name: string; value: string; tracebackRaw?: string[] };
 }) {
   const { isCopied, copyToClipboard } = useCopyToClipboard({ timeout: 1000 });
-  const contentToCopy = `Fix this error:\n${runtimeError.name}\n${runtimeError.value}\n${runtimeError.tracebackRaw.join("\n")}`;
+  const contentToCopy = `Fix this error:\n${runtimeError.name}\n${runtimeError.value}\n${runtimeError.tracebackRaw?.join("\n")}`;
   return (
     <Collapsible className="bg-red-100 text-red-800 rounded-md py-2 px-4 space-y-4">
       <CollapsibleTrigger className="font-bold w-full text-start flex items-center justify-between">
@@ -216,7 +216,7 @@ function RunTimeError({
         <div className="flex flex-col gap-2">
           <p className="font-semibold">{runtimeError.name}</p>
           <p>{runtimeError.value}</p>
-          {runtimeError.tracebackRaw.map((trace, index) => (
+          {runtimeError.tracebackRaw?.map((trace, index) => (
             <pre key={index} className="whitespace-pre-wrap text-sm mb-2">
               {trace}
             </pre>

From 1cef23c0c211d683dc2480cd64a98a7518592bad Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 11 Oct 2024 15:28:12 +0700
Subject: [PATCH 04/32] reduce complexity

---
 templates/components/services/python/file.py  | 10 +--
 .../streaming/fastapi/app/api/routers/chat.py |  8 +--
 .../fastapi/app/api/routers/models.py         | 67 +++----------------
 .../fastapi/app/engine/utils/file_helper.py   | 35 +++++++---
 4 files changed, 41 insertions(+), 79 deletions(-)

diff --git a/templates/components/services/python/file.py b/templates/components/services/python/file.py
index 6c73cec54..726148e7c 100644
--- a/templates/components/services/python/file.py
+++ b/templates/components/services/python/file.py
@@ -69,9 +69,7 @@ def _load_file_to_documents(file_metadata: FileMetadata) -> List[Document]:
         """
         Load the file from the private directory and return the documents
         """
-        file_path = file_metadata.outputPath
-        file_name = file_metadata.filename
-        extension = file_path.split(".")[-1]
+        extension = file_metadata.name.split(".")[-1]
 
         # Load file to documents
         # If LlamaParse is enabled, use it to parse the file
@@ -82,10 +80,10 @@ def _load_file_to_documents(file_metadata: FileMetadata) -> List[Document]:
             if reader_cls is None:
                 raise ValueError(f"File extension {extension} is not supported")
             reader = reader_cls()
-        documents = reader.load_data(Path(file_path))
+        documents = reader.load_data(Path(file_metadata.path))
         # Add custom metadata
         for doc in documents:
-            doc.metadata["file_name"] = file_name
+            doc.metadata["file_name"] = file_metadata.name
             doc.metadata["private"] = "true"
         return documents
 
@@ -169,6 +167,8 @@ def process_file(
         else:
             documents = cls._load_file_to_documents(file_metadata)
             cls._add_documents_to_vector_store_index(documents, index)
+            # Add document ids to the file metadata
+            file_metadata.document_ids = [doc.doc_id for doc in documents]
 
         # Return the file metadata
         return file_metadata
diff --git a/templates/types/streaming/fastapi/app/api/routers/chat.py b/templates/types/streaming/fastapi/app/api/routers/chat.py
index 6edbd9e00..7e96c9274 100644
--- a/templates/types/streaming/fastapi/app/api/routers/chat.py
+++ b/templates/types/streaming/fastapi/app/api/routers/chat.py
@@ -15,7 +15,6 @@
 from app.api.routers.vercel_response import VercelStreamResponse
 from app.engine.engine import get_chat_engine
 from app.engine.query_filter import generate_filters
-from app.engine.tools import ToolFactory
 
 chat_router = r = APIRouter()
 
@@ -29,13 +28,8 @@ async def chat(
     data: ChatData,
     background_tasks: BackgroundTasks,
 ):
-    # Check is code interpreter is enabled
-    tools = ToolFactory.from_env(map_result=True)
-    only_file_metadata = False
-    if "interpreter" in tools or "artifact" in tools:
-        only_file_metadata = True
     try:
-        last_message_content = data.get_last_message_content(only_file_metadata)
+        last_message_content = data.get_last_message_content()
         messages = data.get_history_messages()
 
         doc_ids = data.get_chat_document_ids()
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 044bf0da5..b759b90f9 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -1,6 +1,5 @@
 import logging
 import os
-from textwrap import dedent
 from typing import Any, Dict, List, Literal, Optional
 
 from llama_index.core.llms import ChatMessage, MessageRole
@@ -30,11 +29,7 @@ def to_llm_content(self) -> str:
         """
         Construct content for LLM from the file metadata
         """
-        default_content = dedent(
-            f"""
-            ====={self.name}=====\n
-            """
-        )
+        default_content = f"=====File: {self.name}=====\n"
         if self.url is not None:
             default_content += f"File URL: {self.url}\n"
         else:
@@ -47,7 +42,7 @@ def to_llm_content(self) -> str:
             default_content += f"Document IDs: {self.refs}\n"
         # construct additional metadata for code interpreter
         sandbox_file_path = f"/tmp/{self.name}"
-        default_content += f"Sandbox file path: {sandbox_file_path}\n"
+        default_content += f"Sandbox file path (instruction: only use sandbox path for artifact or code interpreter tool): {sandbox_file_path}\n"
         return default_content
 
 
@@ -61,31 +56,6 @@ def _load_file_content(self) -> str:
         with open(file_path, "r") as file:
             return file.read()
 
-    def to_llm_content(
-        self,
-        only_file_metadata: bool = False,
-        ignore_unsupported_filetype: bool = True,
-    ) -> str:
-        """
-        Construct content for LLM from the file
-        Args:
-            ignore_unsupported_filetype: If True, ignore the file type that is not supported
-        Returns:
-            The content for LLM
-        """
-        llm_content = ""
-        llm_content += self.metadata.to_llm_content()
-        if not only_file_metadata:
-            if self.filetype == "csv" or self.filetype == "txt":
-                file_content = self._load_file_content()
-                llm_content += f"Content:\n{file_content}\n"
-            else:
-                if ignore_unsupported_filetype:
-                    return f"Content:\nCould not load content for this file because the file type {self.filetype} is not supported"
-                else:
-                    raise ValueError(f"Unsupported file type: {self.filetype}")
-        return llm_content
-
 
 class AnnotationFileData(BaseModel):
     files: List[File] = Field(
@@ -124,16 +94,14 @@ class Annotation(BaseModel):
     type: str
     data: AnnotationFileData | List[str] | AgentAnnotation | ArtifactAnnotation
 
-    def to_content(self, only_file_metadata: bool = False) -> str | None:
+    def to_content(self) -> str | None:
         # Note: This code only handles files that were not indexed in the vector database
         # (i.e., files not uploaded through the upload file API)
         if self.type == "document_file":
             assert isinstance(self.data, AnnotationFileData)
             # We only support generating context content for CSV files for now
             # iterate through all files and construct content for LLM
-            file_contents = [
-                file.to_llm_content(only_file_metadata) for file in self.data.files
-            ]
+            file_contents = [file.metadata.to_llm_content() for file in self.data.files]
             if len(file_contents) > 0:
                 return "Use data from following files content\n" + "\n".join(
                     file_contents
@@ -175,15 +143,10 @@ def messages_must_not_be_empty(cls, v):
             raise ValueError("Messages must not be empty")
         return v
 
-    def get_last_message_content(
-        self,
-        only_file_metadata: bool = False,
-    ) -> str:
+    def get_last_message_content(self) -> str:
         """
         Get the content of the last message along with the data content if available.
         Fallback to use data content from previous messages
-        Args:
-            only_file_metadata: Either include the uploaded file content or not. If false, only file metadata will be included which is useful for code interpreter tool
         """
         if len(self.messages) == 0:
             raise ValueError("There is not any message in the chat")
@@ -193,10 +156,7 @@ def get_last_message_content(
             if message.role == MessageRole.USER and message.annotations is not None:
                 annotation_contents = filter(
                     None,
-                    [
-                        annotation.to_content(only_file_metadata)
-                        for annotation in message.annotations
-                    ],
+                    [annotation.to_content() for annotation in message.annotations],
                 )
                 if not annotation_contents:
                     continue
@@ -289,16 +249,11 @@ def get_chat_document_ids(self) -> List[str]:
         Get the document IDs from the chat messages
         """
         document_ids: List[str] = []
-        for message in self.messages:
-            if message.role == MessageRole.USER and message.annotations is not None:
-                for annotation in message.annotations:
-                    if (
-                        annotation.type == "document_file"
-                        and annotation.data.files is not None
-                    ):
-                        for fi in annotation.data.files:
-                            if fi.metadata.refs is not None:
-                                document_ids += fi.metadata.refs
+        uploaded_files = self.get_uploaded_files()
+        for _file in uploaded_files:
+            file_metadata = _file.get("metadata", {})
+            refs = file_metadata.get("refs", [])
+            document_ids.extend(refs)
         return list(set(document_ids))
 
     def get_uploaded_files(self) -> List[Dict[str, Any]]:
diff --git a/templates/types/streaming/fastapi/app/engine/utils/file_helper.py b/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
index 6452cdf85..cc9b07a19 100644
--- a/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
+++ b/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
@@ -1,25 +1,38 @@
 import logging
 import os
 import uuid
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field, computed_field
 
 logger = logging.getLogger(__name__)
 
 
 class FileMetadata(BaseModel):
-    outputPath: str
-    filename: str
-    url: str
+    path: str = Field(..., description="The stored path of the file")
+    name: str = Field(..., description="The name of the file")
+    url: str = Field(..., description="The URL of the file")
+    document_ids: Optional[List[str]] = Field(
+        None, description="The indexed document IDs that the file is referenced to"
+    )
+
+    @computed_field
+    @property
+    def file_id(self) -> Optional[str]:
+        file_els = self.name.split("_", maxsplit=1)
+        if len(file_els) == 2:
+            return file_els[0]
+        return None
 
     def to_upload_response(self) -> Dict[str, Any]:
-        file_id = self.outputPath.split("/")[-1]
-        return {
-            "id": file_id,
-            "name": self.filename,
+        response = {
+            "id": self.file_id,
+            "name": self.name,
             "url": self.url,
         }
+        if self.document_ids is not None:
+            response["refs"] = self.document_ids
+        return response
 
 
 def save_file(
@@ -66,7 +79,7 @@ def save_file(
     logger.info(f"Saved file to {file_path}")
 
     return FileMetadata(
-        outputPath=file_path if isinstance(file_path, str) else str(file_path),
-        filename=file_name,
+        path=file_path if isinstance(file_path, str) else str(file_path),
+        name=file_name,
         url=f"{os.getenv('FILESERVER_URL_PREFIX')}/{file_path}",
     )

From 5bd3591c919f5ca67285a55e78d0bcf14dc0fdec Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 11 Oct 2024 15:52:20 +0700
Subject: [PATCH 05/32] fix mypy

---
 templates/components/services/python/file.py                   | 2 +-
 templates/types/streaming/fastapi/app/api/routers/models.py    | 3 +--
 .../types/streaming/fastapi/app/engine/utils/file_helper.py    | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/templates/components/services/python/file.py b/templates/components/services/python/file.py
index 726148e7c..0382ba0fa 100644
--- a/templates/components/services/python/file.py
+++ b/templates/components/services/python/file.py
@@ -62,7 +62,7 @@ def _store_file(file_name, file_data) -> FileMetadata:
         os.makedirs(PrivateFileService.PRIVATE_STORE_PATH, exist_ok=True)
         file_path = Path(os.path.join(PrivateFileService.PRIVATE_STORE_PATH, file_name))
 
-        return save_file(file_data, file_path=file_path)
+        return save_file(file_data, file_path=str(file_path))
 
     @staticmethod
     def _load_file_to_documents(file_metadata: FileMetadata) -> List[Document]:
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index b759b90f9..0a5fa2afe 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -97,8 +97,7 @@ class Annotation(BaseModel):
     def to_content(self) -> str | None:
         # Note: This code only handles files that were not indexed in the vector database
         # (i.e., files not uploaded through the upload file API)
-        if self.type == "document_file":
-            assert isinstance(self.data, AnnotationFileData)
+        if self.type == "document_file" and isinstance(self.data, AnnotationFileData):
             # We only support generating context content for CSV files for now
             # iterate through all files and construct content for LLM
             file_contents = [file.metadata.to_llm_content() for file in self.data.files]
diff --git a/templates/types/streaming/fastapi/app/engine/utils/file_helper.py b/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
index cc9b07a19..df5ccf313 100644
--- a/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
+++ b/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
@@ -17,7 +17,6 @@ class FileMetadata(BaseModel):
     )
 
     @computed_field
-    @property
     def file_id(self) -> Optional[str]:
         file_els = self.name.split("_", maxsplit=1)
         if len(file_els) == 2:

From c8a9472b9aad5be9fea7c20e91aa1057c68bf5a2 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 11 Oct 2024 16:11:35 +0700
Subject: [PATCH 06/32] fix mypy

---
 .../components/engines/python/agent/tools/artifact.py    | 9 +++++----
 templates/components/routers/python/sandbox.py           | 2 +-
 .../types/streaming/fastapi/app/api/routers/models.py    | 4 +++-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/artifact.py b/templates/components/engines/python/agent/tools/artifact.py
index 6c16012e8..5506113f9 100644
--- a/templates/components/engines/python/agent/tools/artifact.py
+++ b/templates/components/engines/python/agent/tools/artifact.py
@@ -95,12 +95,13 @@ def artifact(
             ChatMessage(role="user", content=user_message),
         ]
         try:
-            sllm = Settings.llm.as_structured_llm(output_cls=CodeArtifact)
+            sllm = Settings.llm.as_structured_llm(output_cls=CodeArtifact)  # type: ignore
             response = sllm.chat(messages)
-            data: CodeArtifact = response.raw.model_dump()
+            data: CodeArtifact = response.raw
+            data_dict = data.model_dump()
             if sandbox_files:
-                data["files"] = sandbox_files
-            return data
+                data_dict["files"] = sandbox_files
+            return data_dict
         except Exception as e:
             logger.error(f"Failed to generate artifact: {str(e)}")
             raise e
diff --git a/templates/components/routers/python/sandbox.py b/templates/components/routers/python/sandbox.py
index c754e42c4..233602fd4 100644
--- a/templates/components/routers/python/sandbox.py
+++ b/templates/components/routers/python/sandbox.py
@@ -174,7 +174,7 @@ def _download_cell_results(cell_results: Optional[List]) -> List[Dict[str, str]]
                     output.append(
                         {
                             "type": ext,
-                            "filename": file_meta.filename,
+                            "filename": file_meta.name,
                             "url": file_meta.url,
                         }
                     )
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 0a5fa2afe..89ac195ec 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -262,7 +262,9 @@ def get_uploaded_files(self) -> List[Dict[str, Any]]:
         for message in self.messages:
             if message.role == MessageRole.USER and message.annotations is not None:
                 for annotation in message.annotations:
-                    if annotation.type == "document_file":
+                    if annotation.type == "document_file" and isinstance(
+                        annotation.data, AnnotationFileData
+                    ):
                         return [file.model_dump() for file in annotation.data.files]
         return []
 

From 5fd25f6f44173260d98ed0f800a0315763d95125 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 11 Oct 2024 16:22:55 +0700
Subject: [PATCH 07/32] remove comment

---
 templates/types/streaming/fastapi/app/api/routers/models.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 89ac195ec..538485cf9 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -95,10 +95,7 @@ class Annotation(BaseModel):
     data: AnnotationFileData | List[str] | AgentAnnotation | ArtifactAnnotation
 
     def to_content(self) -> str | None:
-        # Note: This code only handles files that were not indexed in the vector database
-        # (i.e., files not uploaded through the upload file API)
         if self.type == "document_file" and isinstance(self.data, AnnotationFileData):
-            # We only support generating context content for CSV files for now
             # iterate through all files and construct content for LLM
             file_contents = [file.metadata.to_llm_content() for file in self.data.files]
             if len(file_contents) > 0:

From a4d3d36026fa7c578d4dcaf17174631a43686908 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Mon, 14 Oct 2024 09:05:59 +0700
Subject: [PATCH 08/32] support upload file and enhance interpreter tool

---
 helpers/tools.ts                              |   2 +-
 .../engines/python/agent/tools/interpreter.py | 136 +++++++++++++-----
 2 files changed, 101 insertions(+), 37 deletions(-)

diff --git a/helpers/tools.ts b/helpers/tools.ts
index 0684a780d..c7349569c 100644
--- a/helpers/tools.ts
+++ b/helpers/tools.ts
@@ -139,7 +139,7 @@ For better results, you can specify the region parameter to get results from a s
     dependencies: [
       {
         name: "e2b_code_interpreter",
-        version: "0.0.10",
+        version: "^0.0.11b38",
       },
     ],
     supportedFrameworks: ["fastapi", "express", "nextjs"],
diff --git a/templates/components/engines/python/agent/tools/interpreter.py b/templates/components/engines/python/agent/tools/interpreter.py
index 5bbadbd75..6f264efc1 100644
--- a/templates/components/engines/python/agent/tools/interpreter.py
+++ b/templates/components/engines/python/agent/tools/interpreter.py
@@ -4,12 +4,13 @@
 import uuid
 from typing import Dict, List, Optional
 
+from app.engine.utils.file_helper import save_file
 from e2b_code_interpreter import CodeInterpreter
 from e2b_code_interpreter.models import Logs
 from llama_index.core.tools import FunctionTool
 from pydantic import BaseModel
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("uvicorn")
 
 
 class InterpreterExtraResult(BaseModel):
@@ -22,11 +23,14 @@ class InterpreterExtraResult(BaseModel):
 class E2BToolOutput(BaseModel):
     is_error: bool
     logs: Logs
+    error_message: Optional[str] = None
     results: List[InterpreterExtraResult] = []
+    retry_count: int = 0
 
 
 class E2BCodeInterpreter:
     output_dir = "output/tools"
+    uploaded_files_dir = "output/uploaded"
 
     def __init__(self, api_key: str = None):
         if api_key is None:
@@ -42,40 +46,52 @@ def __init__(self, api_key: str = None):
             )
 
         self.filesever_url_prefix = filesever_url_prefix
-        self.interpreter = CodeInterpreter(api_key=api_key)
+        self.interpreter = None
+        self.api_key = api_key
 
     def __del__(self):
-        self.interpreter.kill()
-
-    def get_output_path(self, filename: str) -> str:
-        # if output directory doesn't exist, create it
-        if not os.path.exists(self.output_dir):
-            os.makedirs(self.output_dir, exist_ok=True)
-        return os.path.join(self.output_dir, filename)
+        """
+        Kill the interpreter when the tool is no longer in use
+        """
+        if self.interpreter is not None:
+            self.interpreter.kill()
 
-    def save_to_disk(self, base64_data: str, ext: str) -> Dict:
-        filename = f"{uuid.uuid4()}.{ext}"  # generate a unique filename
+    def _init_interpreter(self, sandbox_files: List[str] = []):
+        """
+        Lazily initialize the interpreter.
+        """
+        logger.info(f"Initializing interpreter with {len(sandbox_files)} files")
+        self.interpreter = CodeInterpreter(api_key=self.api_key)
+        if len(sandbox_files) > 0:
+            for file_path in sandbox_files:
+                # The file path is a local path, but sometimes AI passes a sandbox file path
+                # We need to support both
+                file_name = os.path.basename(file_path)
+                if file_path.startswith("/tmp"):
+                    # The file path is a sandbox file path
+                    sandbox_file_path = file_path
+                    local_file_path = os.path.join(self.uploaded_files_dir, file_name)
+                else:
+                    # The file path is a local file path
+                    local_file_path = file_path
+                    sandbox_file_path = f"tmp/{file_name}"
+
+                with open(local_file_path, "rb") as f:
+                    content = f.read()
+                    self.interpreter.files.write(sandbox_file_path, content)
+            logger.info(f"Uploaded {len(sandbox_files)} files to sandbox")
+
+    def _save_to_disk(self, base64_data: str, ext: str) -> Dict:
         buffer = base64.b64decode(base64_data)
-        output_path = self.get_output_path(filename)
 
-        try:
-            with open(output_path, "wb") as file:
-                file.write(buffer)
-        except IOError as e:
-            logger.error(f"Failed to write to file {output_path}: {str(e)}")
-            raise e
-
-        logger.info(f"Saved file to {output_path}")
+        filename = f"{uuid.uuid4()}.{ext}"  # generate a unique filename
+        output_path = os.path.join(self.output_dir, filename)
 
-        return {
-            "outputPath": output_path,
-            "filename": filename,
-        }
+        file_metadata = save_file(buffer, file_path=output_path)
 
-    def get_file_url(self, filename: str) -> str:
-        return f"{self.filesever_url_prefix}/{self.output_dir}/{filename}"
+        return file_metadata
 
-    def parse_result(self, result) -> List[InterpreterExtraResult]:
+    def _parse_result(self, result) -> List[InterpreterExtraResult]:
         """
         The result could include multiple formats (e.g. png, svg, etc.) but encoded in base64
         We save each result to disk and return saved file metadata (extension, filename, url)
@@ -92,16 +108,20 @@ def parse_result(self, result) -> List[InterpreterExtraResult]:
             for ext, data in zip(formats, results):
                 match ext:
                     case "png" | "svg" | "jpeg" | "pdf":
-                        result = self.save_to_disk(data, ext)
-                        filename = result["filename"]
+                        file_metadata = self._save_to_disk(data, ext)
                         output.append(
                             InterpreterExtraResult(
                                 type=ext,
-                                filename=filename,
-                                url=self.get_file_url(filename),
+                                filename=file_metadata.name,
+                                url=file_metadata.url,
                             )
                         )
                     case _:
+                        # Try serialize data to string
+                        try:
+                            data = str(data)
+                        except Exception as e:
+                            data = f"Error when serializing data: {e}"
                         output.append(
                             InterpreterExtraResult(
                                 type=ext,
@@ -114,27 +134,71 @@ def parse_result(self, result) -> List[InterpreterExtraResult]:
 
         return output
 
-    def interpret(self, code: str) -> E2BToolOutput:
+    def interpret(
+        self,
+        code: str,
+        sandbox_files: List[str] = [],
+        retry_count: int = 0,
+    ) -> E2BToolOutput:
         """
         Execute python code in a Jupyter notebook cell, the toll will return result, stdout, stderr, display_data, and error.
+        If the code need to use a file, ALWAYS pass the file path in the sandbox_files argument.
+        You have a maximum of 3 retries to get the code to run successfully.
 
         Parameters:
             code (str): The python code to be executed in a single cell.
+            sandbox_files (List[str]): List of local file paths be used the the code, the tool will throw error if a file is not found.
+            retry_count (int): Number of times the tool has been retried.
         """
+        if retry_count > 2:
+            return E2BToolOutput(
+                is_error=True,
+                logs=Logs(
+                    stdout="",
+                    stderr="",
+                    display_data="",
+                    error="",
+                ),
+                error_message="Tool failed to execute code successfully after 3 retries. Explain the error to the user and suggest a fix.",
+                retry_count=retry_count,
+            )
+
+        if self.interpreter is None:
+            self._init_interpreter(sandbox_files)
+
         logger.info(
             f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}"
         )
         exec = self.interpreter.notebook.exec_cell(code)
 
         if exec.error:
-            logger.error("Error when executing code", exec.error)
-            output = E2BToolOutput(is_error=True, logs=exec.logs, results=[])
+            error_message = f"The code failed to execute successfully. Error: {exec.error}. Try to fix the code and run again."
+            logger.error(error_message)
+            # There would be an error from previous execution, kill the interpreter and return with error message
+            try:
+                self.interpreter.kill()
+            except Exception:
+                pass
+            finally:
+                self.interpreter = None
+            output = E2BToolOutput(
+                is_error=True,
+                logs=exec.logs,
+                results=[],
+                error_message=error_message,
+                retry_count=retry_count + 1,
+            )
         else:
             if len(exec.results) == 0:
                 output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
             else:
-                results = self.parse_result(exec.results[0])
-                output = E2BToolOutput(is_error=False, logs=exec.logs, results=results)
+                results = self._parse_result(exec.results[0])
+                output = E2BToolOutput(
+                    is_error=False,
+                    logs=exec.logs,
+                    results=results,
+                    retry_count=retry_count + 1,
+                )
         return output
 
 

From 6efadd4a40f73f5aa8cdcba125a4aeb566bff599 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Mon, 14 Oct 2024 11:05:29 +0700
Subject: [PATCH 09/32] fix blocking stream event

---
 .../streaming/fastapi/app/api/routers/chat.py |  23 +---
 .../fastapi/app/api/routers/models.py         |   1 -
 .../app/api/routers/vercel_response.py        | 100 ++++++++++++------
 3 files changed, 70 insertions(+), 54 deletions(-)

diff --git a/templates/types/streaming/fastapi/app/api/routers/chat.py b/templates/types/streaming/fastapi/app/api/routers/chat.py
index 7e96c9274..c024dad02 100644
--- a/templates/types/streaming/fastapi/app/api/routers/chat.py
+++ b/templates/types/streaming/fastapi/app/api/routers/chat.py
@@ -1,8 +1,6 @@
 import logging
-from typing import List
 
 from fastapi import APIRouter, BackgroundTasks, HTTPException, Request, status
-from llama_index.core.chat_engine.types import NodeWithScore
 from llama_index.core.llms import MessageRole
 
 from app.api.routers.events import EventCallbackHandler
@@ -42,10 +40,11 @@ async def chat(
         chat_engine = get_chat_engine(
             filters=filters, params=params, event_handlers=[event_handler]
         )
-        response = await chat_engine.astream_chat(last_message_content, messages)
-        process_response_nodes(response.source_nodes, background_tasks)
+        response = chat_engine.astream_chat(last_message_content, messages)
 
-        return VercelStreamResponse(request, event_handler, response, data)
+        return VercelStreamResponse(
+            request, event_handler, response, data, background_tasks
+        )
     except Exception as e:
         logger.exception("Error in chat engine", exc_info=True)
         raise HTTPException(
@@ -76,17 +75,3 @@ async def chat_request(
         result=Message(role=MessageRole.ASSISTANT, content=response.response),
         nodes=SourceNodes.from_source_nodes(response.source_nodes),
     )
-
-
-def process_response_nodes(
-    nodes: List[NodeWithScore],
-    background_tasks: BackgroundTasks,
-):
-    try:
-        # Start background tasks to download documents from LlamaCloud if needed
-        from app.engine.service import LLamaCloudFileService
-
-        LLamaCloudFileService.download_files_from_nodes(nodes, background_tasks)
-    except ImportError:
-        logger.debug("LlamaCloud is not configured. Skipping post processing of nodes")
-        pass
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 538485cf9..b56b8f090 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -199,7 +199,6 @@ def _get_latest_code_artifact(self) -> Optional[str]:
                     ):
                         tool_output = annotation.data.toolOutput
                         if tool_output and not tool_output.get("isError", False):
-                            print("tool_output", tool_output)
                             output = tool_output.get("output", {})
                             if isinstance(output, dict) and output.get("code"):
                                 return output.get("code")
diff --git a/templates/types/streaming/fastapi/app/api/routers/vercel_response.py b/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
index 924c60ce5..b1bb75178 100644
--- a/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
+++ b/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
@@ -1,15 +1,19 @@
 import json
+import logging
 from typing import List
 
 from aiostream import stream
-from fastapi import Request
+from fastapi import BackgroundTasks, Request
 from fastapi.responses import StreamingResponse
 from llama_index.core.chat_engine.types import StreamingAgentChatResponse
+from llama_index.core.schema import RelatedNodeInfo
 
 from app.api.routers.events import EventCallbackHandler
 from app.api.routers.models import ChatData, Message, SourceNodes
 from app.api.services.suggestion import NextQuestionSuggestion
 
+logger = logging.getLogger("uvicorn")
+
 
 class VercelStreamResponse(StreamingResponse):
     """
@@ -19,26 +23,16 @@ class VercelStreamResponse(StreamingResponse):
     TEXT_PREFIX = "0:"
     DATA_PREFIX = "8:"
 
-    @classmethod
-    def convert_text(cls, token: str):
-        # Escape newlines and double quotes to avoid breaking the stream
-        token = json.dumps(token)
-        return f"{cls.TEXT_PREFIX}{token}\n"
-
-    @classmethod
-    def convert_data(cls, data: dict):
-        data_str = json.dumps(data)
-        return f"{cls.DATA_PREFIX}[{data_str}]\n"
-
     def __init__(
         self,
         request: Request,
         event_handler: EventCallbackHandler,
         response: StreamingAgentChatResponse,
         chat_data: ChatData,
+        background_tasks: BackgroundTasks,
     ):
         content = VercelStreamResponse.content_generator(
-            request, event_handler, response, chat_data
+            request, event_handler, response, chat_data, background_tasks
         )
         super().__init__(content=content)
 
@@ -49,23 +43,22 @@ async def content_generator(
         event_handler: EventCallbackHandler,
         response: StreamingAgentChatResponse,
         chat_data: ChatData,
+        background_tasks: BackgroundTasks,
     ):
+        # Yield the events from the event handler
+        async def _event_generator():
+            async for event in event_handler.async_event_gen():
+                event_response = event.to_response()
+                if event_response is not None:
+                    yield cls.convert_data(event_response)
+
         # Yield the text response
         async def _chat_response_generator():
-            final_response = ""
-            async for token in response.async_response_gen():
-                final_response += token
-                yield cls.convert_text(token)
+            # Wait for the response from the chat engine
+            result = await response
 
-            # Generate next questions if next question prompt is configured
-            question_data = await cls._generate_next_questions(
-                chat_data.messages, final_response
-            )
-            if question_data:
-                yield cls.convert_data(question_data)
-
-            # the text_generator is the leading stream, once it's finished, also finish the event stream
-            event_handler.is_done = True
+            # Once we got a source node, start a background task to download the files (if needed)
+            cls._download_llamacloud_files(result.source_nodes, background_tasks)
 
             # Yield the source nodes
             yield cls.convert_data(
@@ -74,26 +67,35 @@ async def _chat_response_generator():
                     "data": {
                         "nodes": [
                             SourceNodes.from_source_node(node).model_dump()
-                            for node in response.source_nodes
+                            for node in result.source_nodes
                         ]
                     },
                 }
             )
 
-        # Yield the events from the event handler
-        async def _event_generator():
-            async for event in event_handler.async_event_gen():
-                event_response = event.to_response()
-                if event_response is not None:
-                    yield cls.convert_data(event_response)
+            final_response = ""
+            async for token in result.async_response_gen():
+                final_response += token
+                yield cls.convert_text(token)
 
+            # Generate next questions if next question prompt is configured
+            question_data = await cls._generate_next_questions(
+                chat_data.messages, final_response
+            )
+            if question_data:
+                yield cls.convert_data(question_data)
+
+            # the text_generator is the leading stream, once it's finished, also finish the event stream
+            event_handler.is_done = True
+
+        # Merge the chat response generator and the event generator
         combine = stream.merge(_chat_response_generator(), _event_generator())
         is_stream_started = False
         async with combine.stream() as streamer:
             async for output in streamer:
                 if not is_stream_started:
                     is_stream_started = True
-                    # Stream a blank message to start the stream
+                    # Stream a blank message to start displaying the response in the UI
                     yield cls.convert_text("")
 
                 yield output
@@ -112,3 +114,33 @@ async def _generate_next_questions(chat_history: List[Message], response: str):
                 "data": questions,
             }
         return None
+
+    @classmethod
+    def convert_text(cls, token: str):
+        # Escape newlines and double quotes to avoid breaking the stream
+        token = json.dumps(token)
+        return f"{cls.TEXT_PREFIX}{token}\n"
+
+    @classmethod
+    def convert_data(cls, data: dict):
+        data_str = json.dumps(data)
+        return f"{cls.DATA_PREFIX}[{data_str}]\n"
+
+    @classmethod
+    def _download_llamacloud_files(
+        cls,
+        source_nodes: List[RelatedNodeInfo],
+        background_tasks: BackgroundTasks,
+    ):
+        try:
+            # Start background tasks to download documents from LlamaCloud if needed
+            from app.engine.service import LLamaCloudFileService
+
+            LLamaCloudFileService.download_files_from_nodes(
+                source_nodes, background_tasks
+            )
+        except ImportError:
+            logger.debug(
+                "LlamaCloud is not configured. Skipping post processing of nodes"
+            )
+            pass

From 3e82be7a86958768314151fe19fda166b86c3a5d Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Mon, 14 Oct 2024 11:27:40 +0700
Subject: [PATCH 10/32] fix mypy

---
 .../fastapi/app/api/routers/vercel_response.py     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/templates/types/streaming/fastapi/app/api/routers/vercel_response.py b/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
index b1bb75178..8999f36f8 100644
--- a/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
+++ b/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
@@ -1,12 +1,12 @@
 import json
 import logging
-from typing import List
+from typing import Awaitable, List
 
 from aiostream import stream
 from fastapi import BackgroundTasks, Request
 from fastapi.responses import StreamingResponse
 from llama_index.core.chat_engine.types import StreamingAgentChatResponse
-from llama_index.core.schema import RelatedNodeInfo
+from llama_index.core.schema import NodeWithScore
 
 from app.api.routers.events import EventCallbackHandler
 from app.api.routers.models import ChatData, Message, SourceNodes
@@ -27,7 +27,7 @@ def __init__(
         self,
         request: Request,
         event_handler: EventCallbackHandler,
-        response: StreamingAgentChatResponse,
+        response: Awaitable[StreamingAgentChatResponse],
         chat_data: ChatData,
         background_tasks: BackgroundTasks,
     ):
@@ -41,7 +41,7 @@ async def content_generator(
         cls,
         request: Request,
         event_handler: EventCallbackHandler,
-        response: StreamingAgentChatResponse,
+        response: Awaitable[StreamingAgentChatResponse],
         chat_data: ChatData,
         background_tasks: BackgroundTasks,
     ):
@@ -58,7 +58,7 @@ async def _chat_response_generator():
             result = await response
 
             # Once we got a source node, start a background task to download the files (if needed)
-            cls._download_llamacloud_files(result.source_nodes, background_tasks)
+            cls.process_response_nodes(result.source_nodes, background_tasks)
 
             # Yield the source nodes
             yield cls.convert_data(
@@ -127,9 +127,9 @@ def convert_data(cls, data: dict):
         return f"{cls.DATA_PREFIX}[{data_str}]\n"
 
     @classmethod
-    def _download_llamacloud_files(
+    def process_response_nodes(
         cls,
-        source_nodes: List[RelatedNodeInfo],
+        source_nodes: List[NodeWithScore],
         background_tasks: BackgroundTasks,
     ):
         try:

From 9602c6ca889781a7d8d1a6bcc3faedbae6904de5 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Mon, 14 Oct 2024 11:43:00 +0700
Subject: [PATCH 11/32] add changeset and fix mypy after merge

---
 .changeset/poor-knives-smoke.md               |  5 ++
 .changeset/wet-tips-judge.md                  |  5 ++
 .../engines/python/agent/tools/interpreter.py | 68 ++++++++++---------
 3 files changed, 45 insertions(+), 33 deletions(-)
 create mode 100644 .changeset/poor-knives-smoke.md
 create mode 100644 .changeset/wet-tips-judge.md

diff --git a/.changeset/poor-knives-smoke.md b/.changeset/poor-knives-smoke.md
new file mode 100644
index 000000000..a09c627c2
--- /dev/null
+++ b/.changeset/poor-knives-smoke.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Fix event streaming is blocked
diff --git a/.changeset/wet-tips-judge.md b/.changeset/wet-tips-judge.md
new file mode 100644
index 000000000..baa80fb8c
--- /dev/null
+++ b/.changeset/wet-tips-judge.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Add upload file to sandbox
diff --git a/templates/components/engines/python/agent/tools/interpreter.py b/templates/components/engines/python/agent/tools/interpreter.py
index 6f264efc1..150f59b94 100644
--- a/templates/components/engines/python/agent/tools/interpreter.py
+++ b/templates/components/engines/python/agent/tools/interpreter.py
@@ -2,9 +2,9 @@
 import logging
 import os
 import uuid
-from typing import Dict, List, Optional
+from typing import List, Optional
 
-from app.engine.utils.file_helper import save_file
+from app.engine.utils.file_helper import FileMetadata, save_file
 from e2b_code_interpreter import CodeInterpreter
 from e2b_code_interpreter.models import Logs
 from llama_index.core.tools import FunctionTool
@@ -78,10 +78,11 @@ def _init_interpreter(self, sandbox_files: List[str] = []):
 
                 with open(local_file_path, "rb") as f:
                     content = f.read()
-                    self.interpreter.files.write(sandbox_file_path, content)
+                    if self.interpreter and self.interpreter.files:
+                        self.interpreter.files.write(sandbox_file_path, content)
             logger.info(f"Uploaded {len(sandbox_files)} files to sandbox")
 
-    def _save_to_disk(self, base64_data: str, ext: str) -> Dict:
+    def _save_to_disk(self, base64_data: str, ext: str) -> FileMetadata:
         buffer = base64.b64decode(base64_data)
 
         filename = f"{uuid.uuid4()}.{ext}"  # generate a unique filename
@@ -166,40 +167,41 @@ def interpret(
         if self.interpreter is None:
             self._init_interpreter(sandbox_files)
 
-        logger.info(
-            f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}"
-        )
-        exec = self.interpreter.notebook.exec_cell(code)
-
-        if exec.error:
-            error_message = f"The code failed to execute successfully. Error: {exec.error}. Try to fix the code and run again."
-            logger.error(error_message)
-            # There would be an error from previous execution, kill the interpreter and return with error message
-            try:
-                self.interpreter.kill()
-            except Exception:
-                pass
-            finally:
-                self.interpreter = None
-            output = E2BToolOutput(
-                is_error=True,
-                logs=exec.logs,
-                results=[],
-                error_message=error_message,
-                retry_count=retry_count + 1,
+        if self.interpreter and self.interpreter.notebook:
+            logger.info(
+                f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}"
             )
-        else:
-            if len(exec.results) == 0:
-                output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
-            else:
-                results = self._parse_result(exec.results[0])
+            exec = self.interpreter.notebook.exec_cell(code)
+
+            if exec.error:
+                error_message = f"The code failed to execute successfully. Error: {exec.error}. Try to fix the code and run again."
+                logger.error(error_message)
+                # There would be an error from previous execution, kill the interpreter and return with error message
+                try:
+                    self.interpreter.kill()  # type: ignore
+                except Exception:
+                    pass
+                finally:
+                    self.interpreter = None
                 output = E2BToolOutput(
-                    is_error=False,
+                    is_error=True,
                     logs=exec.logs,
-                    results=results,
+                    results=[],
+                    error_message=error_message,
                     retry_count=retry_count + 1,
                 )
-        return output
+            else:
+                if len(exec.results) == 0:
+                    output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
+                else:
+                    results = self._parse_result(exec.results[0])
+                    output = E2BToolOutput(
+                        is_error=False,
+                        logs=exec.logs,
+                        results=results,
+                        retry_count=retry_count + 1,
+                    )
+            return output
 
 
 def get_tools(**kwargs):

From 985cb2659775212686a4ead1c3c775a5c89d6937 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Mon, 14 Oct 2024 12:12:54 +0700
Subject: [PATCH 12/32] fix mypy

---
 templates/components/engines/python/agent/tools/interpreter.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/templates/components/engines/python/agent/tools/interpreter.py b/templates/components/engines/python/agent/tools/interpreter.py
index 150f59b94..ef7e702e1 100644
--- a/templates/components/engines/python/agent/tools/interpreter.py
+++ b/templates/components/engines/python/agent/tools/interpreter.py
@@ -202,6 +202,8 @@ def interpret(
                         retry_count=retry_count + 1,
                     )
             return output
+        else:
+            raise ValueError("Interpreter is not initialized.")
 
 
 def get_tools(**kwargs):

From 9a4c0a3d6d040295c278a9d83b561a8ed3e863f5 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Mon, 14 Oct 2024 13:50:56 +0700
Subject: [PATCH 13/32] enhance code

---
 .../components/routers/python/sandbox.py      |  9 +++-
 templates/components/services/python/file.py  | 20 ++++-----
 .../fastapi/app/api/routers/models.py         | 41 ++++++++++++-------
 .../fastapi/app/engine/utils/file_helper.py   |  5 +--
 4 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/templates/components/routers/python/sandbox.py b/templates/components/routers/python/sandbox.py
index 233602fd4..57ef31167 100644
--- a/templates/components/routers/python/sandbox.py
+++ b/templates/components/routers/python/sandbox.py
@@ -63,14 +63,19 @@ class FileUpload(BaseModel):
 @sandbox_router.post("")
 async def create_sandbox(request: Request):
     request_data = await request.json()
-    artifact_data = request_data["artifact"]
+    artifact_data = request_data.get("artifact", None)
     sandbox_files = artifact_data.get("files", [])
 
+    if not artifact_data:
+        raise HTTPException(
+            status_code=400, detail="Could not create artifact from the request data"
+        )
+
     try:
         artifact = CodeArtifact(**artifact_data)
     except Exception:
         logger.error(f"Could not create artifact from request data: {request_data}")
-        return HTTPException(
+        raise HTTPException(
             status_code=400, detail="Could not create artifact from the request data"
         )
 
diff --git a/templates/components/services/python/file.py b/templates/components/services/python/file.py
index 0382ba0fa..d33780f1c 100644
--- a/templates/components/services/python/file.py
+++ b/templates/components/services/python/file.py
@@ -69,7 +69,8 @@ def _load_file_to_documents(file_metadata: FileMetadata) -> List[Document]:
         """
         Load the file from the private directory and return the documents
         """
-        extension = file_metadata.name.split(".")[-1]
+        _, extension = os.path.splitext(file_metadata.name)
+        extension = extension.lstrip(".")
 
         # Load file to documents
         # If LlamaParse is enabled, use it to parse the file
@@ -125,14 +126,13 @@ def _add_file_to_llama_cloud_index(
         pipeline_id = index._get_pipeline_id()
         # LlamaCloudIndex is a managed index so we can directly use the files
         upload_file = (file_name, BytesIO(file_data))
-        return [
-            LLamaCloudFileService.add_file_to_pipeline(
-                project_id,
-                pipeline_id,
-                upload_file,
-                custom_metadata={},
-            )
-        ]
+        _ = LLamaCloudFileService.add_file_to_pipeline(
+            project_id,
+            pipeline_id,
+            upload_file,
+            custom_metadata={},
+        )
+        return None
 
     @staticmethod
     def _sanitize_file_name(file_name: str) -> str:
@@ -168,7 +168,7 @@ def process_file(
             documents = cls._load_file_to_documents(file_metadata)
             cls._add_documents_to_vector_store_index(documents, index)
             # Add document ids to the file metadata
-            file_metadata.document_ids = [doc.doc_id for doc in documents]
+            file_metadata.refs = [doc.doc_id for doc in documents]
 
         # Return the file metadata
         return file_metadata
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index b56b8f090..52e44de9b 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -25,22 +25,33 @@ class FileMetadata(BaseModel):
     url: Optional[str] = None
     refs: Optional[List[str]] = None
 
+    def _get_url_llm_content(self) -> Optional[str]:
+        url_prefix = os.getenv("FILESERVER_URL_PREFIX")
+        if url_prefix:
+            if self.url is not None:
+                return f"File URL: {self.url}\n"
+            else:
+                # Construct url from file name
+                return f"File URL (instruction: do not update this file URL yourself): {url_prefix}/output/uploaded/{self.name}\n"
+        else:
+            logger.warning(
+                "Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server"
+            )
+            return None
+
     def to_llm_content(self) -> str:
         """
         Construct content for LLM from the file metadata
         """
         default_content = f"=====File: {self.name}=====\n"
-        if self.url is not None:
-            default_content += f"File URL: {self.url}\n"
-        else:
-            # Construct url from file name
-            url = f"https://{os.getenv('FILESERVER_URL_PREFIX')}/output/uploaded/{self.name}"
-            default_content += (
-                f"File URL (instruction: do not update this file URL yourself): {url}\n"
-            )
+        # Include file URL if it's available
+        url_content = self._get_url_llm_content()
+        if url_content:
+            default_content += url_content
+        # Include document IDs if it's available
         if self.refs is not None:
             default_content += f"Document IDs: {self.refs}\n"
-        # construct additional metadata for code interpreter
+        # Include sandbox file path
         sandbox_file_path = f"/tmp/{self.name}"
         default_content += f"Sandbox file path (instruction: only use sandbox path for artifact or code interpreter tool): {sandbox_file_path}\n"
         return default_content
@@ -94,7 +105,7 @@ class Annotation(BaseModel):
     type: str
     data: AnnotationFileData | List[str] | AgentAnnotation | ArtifactAnnotation
 
-    def to_content(self) -> str | None:
+    def to_content(self) -> Optional[str]:
         if self.type == "document_file" and isinstance(self.data, AnnotationFileData):
             # iterate through all files and construct content for LLM
             file_contents = [file.metadata.to_llm_content() for file in self.data.files]
@@ -246,23 +257,23 @@ def get_chat_document_ids(self) -> List[str]:
         document_ids: List[str] = []
         uploaded_files = self.get_uploaded_files()
         for _file in uploaded_files:
-            file_metadata = _file.get("metadata", {})
-            refs = file_metadata.get("refs", [])
+            refs = _file.metadata.refs
             document_ids.extend(refs)
         return list(set(document_ids))
 
-    def get_uploaded_files(self) -> List[Dict[str, Any]]:
+    def get_uploaded_files(self) -> List[File]:
         """
         Get the uploaded files from the chat data
         """
+        uploaded_files = []
         for message in self.messages:
             if message.role == MessageRole.USER and message.annotations is not None:
                 for annotation in message.annotations:
                     if annotation.type == "document_file" and isinstance(
                         annotation.data, AnnotationFileData
                     ):
-                        return [file.model_dump() for file in annotation.data.files]
-        return []
+                        uploaded_files.extend(annotation.data.files)
+        return uploaded_files
 
 
 class SourceNodes(BaseModel):
diff --git a/templates/types/streaming/fastapi/app/engine/utils/file_helper.py b/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
index df5ccf313..5270139ea 100644
--- a/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
+++ b/templates/types/streaming/fastapi/app/engine/utils/file_helper.py
@@ -12,7 +12,7 @@ class FileMetadata(BaseModel):
     path: str = Field(..., description="The stored path of the file")
     name: str = Field(..., description="The name of the file")
     url: str = Field(..., description="The URL of the file")
-    document_ids: Optional[List[str]] = Field(
+    refs: Optional[List[str]] = Field(
         None, description="The indexed document IDs that the file is referenced to"
     )
 
@@ -28,9 +28,8 @@ def to_upload_response(self) -> Dict[str, Any]:
             "id": self.file_id,
             "name": self.name,
             "url": self.url,
+            "refs": self.refs,
         }
-        if self.document_ids is not None:
-            response["refs"] = self.document_ids
         return response
 
 

From 2efc7277fc7917e6a1f5db88cc754089855c67d0 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Mon, 14 Oct 2024 14:14:42 +0700
Subject: [PATCH 14/32] typing

---
 .../engines/python/agent/tools/interpreter.py          | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/interpreter.py b/templates/components/engines/python/agent/tools/interpreter.py
index ef7e702e1..72639290f 100644
--- a/templates/components/engines/python/agent/tools/interpreter.py
+++ b/templates/components/engines/python/agent/tools/interpreter.py
@@ -142,13 +142,13 @@ def interpret(
         retry_count: int = 0,
     ) -> E2BToolOutput:
         """
-        Execute python code in a Jupyter notebook cell, the toll will return result, stdout, stderr, display_data, and error.
-        If the code need to use a file, ALWAYS pass the file path in the sandbox_files argument.
+        Execute Python code in a Jupyter notebook cell. The tool will return the result, stdout, stderr, display_data, and error.
+        If the code needs to use a file, ALWAYS pass the file path in the sandbox_files argument.
         You have a maximum of 3 retries to get the code to run successfully.
 
         Parameters:
-            code (str): The python code to be executed in a single cell.
-            sandbox_files (List[str]): List of local file paths be used the the code, the tool will throw error if a file is not found.
+            code (str): The Python code to be executed in a single cell.
+            sandbox_files (List[str]): List of local file paths to be used by the code. The tool will throw an error if a file is not found.
             retry_count (int): Number of times the tool has been retried.
         """
         if retry_count > 2:
@@ -160,7 +160,7 @@ def interpret(
                     display_data="",
                     error="",
                 ),
-                error_message="Tool failed to execute code successfully after 3 retries. Explain the error to the user and suggest a fix.",
+                error_message="Failed to execute the code after 3 retries. Explain the error to the user and suggest a fix.",
                 retry_count=retry_count,
             )
 

From 249edf5c6942a94f04794d4a0d66182a541ec4ee Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 08:25:26 +0700
Subject: [PATCH 15/32] wording

---
 .changeset/wet-tips-judge.md                                   | 2 +-
 templates/components/engines/python/agent/tools/interpreter.py | 2 +-
 templates/types/streaming/fastapi/app/api/routers/models.py    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.changeset/wet-tips-judge.md b/.changeset/wet-tips-judge.md
index baa80fb8c..478106884 100644
--- a/.changeset/wet-tips-judge.md
+++ b/.changeset/wet-tips-judge.md
@@ -2,4 +2,4 @@
 "create-llama": patch
 ---
 
-Add upload file to sandbox
+Add upload file to sandbox (artifact and code interpreter)
diff --git a/templates/components/engines/python/agent/tools/interpreter.py b/templates/components/engines/python/agent/tools/interpreter.py
index 72639290f..cdce34ed9 100644
--- a/templates/components/engines/python/agent/tools/interpreter.py
+++ b/templates/components/engines/python/agent/tools/interpreter.py
@@ -176,7 +176,7 @@ def interpret(
             if exec.error:
                 error_message = f"The code failed to execute successfully. Error: {exec.error}. Try to fix the code and run again."
                 logger.error(error_message)
-                # There would be an error from previous execution, kill the interpreter and return with error message
+                # Calling the generated code caused an error. Kill the interpreter and return the error to the LLM so it can try to fix the error
                 try:
                     self.interpreter.kill()  # type: ignore
                 except Exception:
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 52e44de9b..8f1dcedfd 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -297,7 +297,7 @@ def from_source_node(cls, source_node: NodeWithScore):
         )
 
     @classmethod
-    def get_url_from_metadata(cls, metadata: Dict[str, Any]) -> str:
+    def get_url_from_metadata(cls, metadata: Dict[str, Any]) -> Optional[str]:
         url_prefix = os.getenv("FILESERVER_URL_PREFIX")
         if not url_prefix:
             logger.warning(

From 22cd95857f49da0c82b4d0481ffbba76d2b98ab5 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 08:57:53 +0700
Subject: [PATCH 16/32] exclude indexing private csv file if code executor tool
 is enabled

---
 templates/components/services/python/file.py  | 46 ++++++++++++++-----
 .../fastapi/app/api/routers/models.py         |  3 +-
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/templates/components/services/python/file.py b/templates/components/services/python/file.py
index d33780f1c..3e9ad3e64 100644
--- a/templates/components/services/python/file.py
+++ b/templates/components/services/python/file.py
@@ -5,7 +5,7 @@
 import uuid
 from io import BytesIO
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple
 
 from app.engine.index import IndexConfig, get_index
 from app.engine.utils.file_helper import FileMetadata, save_file
@@ -15,6 +15,7 @@
     _try_loading_included_file_formats as get_file_loaders_map,
 )
 from llama_index.core.schema import Document
+from llama_index.core.tools.function_tool import FunctionTool
 from llama_index.indices.managed.llama_cloud.base import LlamaCloudIndex
 from llama_index.readers.file import FlatReader
 
@@ -112,7 +113,7 @@ def _add_file_to_llama_cloud_index(
         index: LlamaCloudIndex,
         file_name: str,
         file_data: bytes,
-    ) -> None:
+    ) -> str:
         """
         Add the file to the LlamaCloud index.
         LlamaCloudIndex is a managed index so we can directly use the files.
@@ -126,13 +127,13 @@ def _add_file_to_llama_cloud_index(
         pipeline_id = index._get_pipeline_id()
         # LlamaCloudIndex is a managed index so we can directly use the files
         upload_file = (file_name, BytesIO(file_data))
-        _ = LLamaCloudFileService.add_file_to_pipeline(
+        doc_id = LLamaCloudFileService.add_file_to_pipeline(
             project_id,
             pipeline_id,
             upload_file,
             custom_metadata={},
         )
-        return None
+        return doc_id
 
     @staticmethod
     def _sanitize_file_name(file_name: str) -> str:
@@ -161,14 +162,37 @@ def process_file(
         file_data, extension = cls._preprocess_base64_file(base64_content)
         file_metadata = cls._store_file(new_file_name, file_data)
 
-        # Insert the file into the index
-        if isinstance(index, LlamaCloudIndex):
-            _ = cls._add_file_to_llama_cloud_index(index, new_file_name, file_data)
+        tools = cls._get_available_tools()
+        code_executor_tools = ["interpreter", "artifact"]
+        # If the file is CSV and there is a code executor tool, we don't need to index.
+        if extension == ".csv" and any(tool in tools for tool in code_executor_tools):
+            return file_metadata
         else:
-            documents = cls._load_file_to_documents(file_metadata)
-            cls._add_documents_to_vector_store_index(documents, index)
-            # Add document ids to the file metadata
-            file_metadata.refs = [doc.doc_id for doc in documents]
+            # Insert the file into the index and update document ids to the file metadata
+            if isinstance(index, LlamaCloudIndex):
+                doc_id = cls._add_file_to_llama_cloud_index(
+                    index, new_file_name, file_data
+                )
+                # Add document ids to the file metadata
+                file_metadata.refs = [doc_id]
+            else:
+                documents = cls._load_file_to_documents(file_metadata)
+                cls._add_documents_to_vector_store_index(documents, index)
+                # Add document ids to the file metadata
+                file_metadata.refs = [doc.doc_id for doc in documents]
 
         # Return the file metadata
         return file_metadata
+
+    @staticmethod
+    def _get_available_tools() -> Dict[str, List[FunctionTool]]:
+        try:
+            from app.engine.tools import ToolFactory
+
+            tools = ToolFactory.from_env(map_result=True)
+            return tools
+        except ImportError:
+            # There is no tool code
+            return {}
+        except Exception as e:
+            raise ValueError(f"Failed to get available tools: {e}") from e
diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 8f1dcedfd..7616c5ef8 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -258,7 +258,8 @@ def get_chat_document_ids(self) -> List[str]:
         uploaded_files = self.get_uploaded_files()
         for _file in uploaded_files:
             refs = _file.metadata.refs
-            document_ids.extend(refs)
+            if refs is not None:
+                document_ids.extend(refs)
         return list(set(document_ids))
 
     def get_uploaded_files(self) -> List[File]:

From 30e408b7dbf50817f78623be31b33fef45a48654 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 09:49:37 +0700
Subject: [PATCH 17/32] remove file content and duplicated file id

---
 .../streaming/fastapi/app/api/routers/models.py      | 10 +---------
 .../nextjs/app/components/ui/chat/chat-input.tsx     |  2 +-
 .../components/ui/chat/chat-message/chat-files.tsx   |  2 +-
 .../nextjs/app/components/ui/chat/hooks/use-file.ts  | 11 ++++-------
 .../streaming/nextjs/app/components/ui/chat/index.ts |  7 -------
 .../nextjs/app/components/ui/document-preview.tsx    | 12 ++++++------
 6 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/templates/types/streaming/fastapi/app/api/routers/models.py b/templates/types/streaming/fastapi/app/api/routers/models.py
index 7616c5ef8..3bbe7b6e7 100644
--- a/templates/types/streaming/fastapi/app/api/routers/models.py
+++ b/templates/types/streaming/fastapi/app/api/routers/models.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Dict, List, Optional
 
 from llama_index.core.llms import ChatMessage, MessageRole
 from llama_index.core.schema import NodeWithScore
@@ -12,13 +12,6 @@
 logger = logging.getLogger("uvicorn")
 
 
-class FileContent(BaseModel):
-    type: Literal["text", "ref"]
-    # If the file is pure text then the value is be a string
-    # otherwise, it's a list of document IDs
-    value: str | List[str]
-
-
 class FileMetadata(BaseModel):
     id: str
     name: str
@@ -58,7 +51,6 @@ def to_llm_content(self) -> str:
 
 
 class File(BaseModel):
-    id: str
     filetype: str
     metadata: FileMetadata
 
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-input.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-input.tsx
index 326cc9695..417809c13 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-input.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-input.tsx
@@ -97,7 +97,7 @@ export default function ChatInput(
         <div className="flex gap-4 w-full overflow-auto py-2">
           {files.map((file) => (
             <DocumentPreview
-              key={file.id}
+              key={file.metadata?.id}
               file={file}
               onRemove={() => removeDoc(file)}
             />
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-files.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-files.tsx
index 5139c5411..42c67598b 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-files.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-files.tsx
@@ -6,7 +6,7 @@ export function ChatFiles({ data }: { data: DocumentFileData }) {
   return (
     <div className="flex gap-2 items-center">
       {data.files.map((file) => (
-        <DocumentPreview key={file.id} file={file} />
+        <DocumentPreview key={file.metadata?.id} file={file} />
       ))}
     </div>
   );
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/hooks/use-file.ts b/templates/types/streaming/nextjs/app/components/ui/chat/hooks/use-file.ts
index 2aa9776e2..695202f4f 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/hooks/use-file.ts
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/hooks/use-file.ts
@@ -25,7 +25,7 @@ export function useFile() {
   const [files, setFiles] = useState<DocumentFile[]>([]);
 
   const docEqual = (a: DocumentFile, b: DocumentFile) => {
-    if (a.id === b.id) return true;
+    if (a.metadata?.id === b.metadata?.id) return true;
     if (a.filename === b.filename && a.filesize === b.filesize) return true;
     return false;
   };
@@ -40,7 +40,9 @@ export function useFile() {
   };
 
   const removeDoc = (file: DocumentFile) => {
-    setFiles((prev) => prev.filter((f) => f.id !== file.id));
+    setFiles((prev) =>
+      prev.filter((f) => f.metadata?.id !== file.metadata?.id),
+    );
   };
 
   const reset = () => {
@@ -114,14 +116,9 @@ export function useFile() {
     if (!filetype) throw new Error("Unsupported document type.");
     const uploadedFileMeta = await uploadContent(file, requestParams);
     const newDoc: DocumentFile = {
-      id: uploadedFileMeta.id,
       filename: file.name,
       filesize: file.size,
       filetype,
-      content: {
-        type: "ref",
-        value: uploadedFileMeta.refs || [],
-      },
       metadata: uploadedFileMeta,
     };
     return addDoc(newDoc);
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/index.ts b/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
index 8ce8198d6..e8b55ac25 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
@@ -21,11 +21,6 @@ export type ImageData = {
 
 export type DocumentFileType = "csv" | "pdf" | "txt" | "docx";
 
-export type DocumentFileContent = {
-  type: "ref" | "text";
-  value: string[] | string;
-};
-
 export type UploadedFileMeta = {
   id: string;
   name: string;
@@ -34,11 +29,9 @@ export type UploadedFileMeta = {
 };
 
 export type DocumentFile = {
-  id?: string;
   filename: string;
   filesize: number;
   filetype: DocumentFileType;
-  content: DocumentFileContent;
   metadata?: UploadedFileMeta;
 };
 
diff --git a/templates/types/streaming/nextjs/app/components/ui/document-preview.tsx b/templates/types/streaming/nextjs/app/components/ui/document-preview.tsx
index c2d523221..170c67cf3 100644
--- a/templates/types/streaming/nextjs/app/components/ui/document-preview.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/document-preview.tsx
@@ -23,11 +23,11 @@ export interface DocumentPreviewProps {
 }
 
 export function DocumentPreview(props: DocumentPreviewProps) {
-  const { filename, filesize, content, filetype } = props.file;
+  const { filename, filesize, filetype, metadata } = props.file;
 
-  if (content.type === "ref") {
+  if (metadata?.refs?.length) {
     return (
-      <div title={`Document IDs: ${(content.value as string[]).join(", ")}`}>
+      <div title={`Document IDs: ${metadata.refs.join(", ")}`}>
         <PreviewCard {...props} />
       </div>
     );
@@ -53,9 +53,9 @@ export function DocumentPreview(props: DocumentPreviewProps) {
           </DrawerClose>
         </DrawerHeader>
         <div className="m-4 max-h-[80%] overflow-auto">
-          {content.type === "text" && (
+          {metadata?.refs?.length && (
             <pre className="bg-secondary rounded-md p-4 block text-sm">
-              {content.value as string}
+              {metadata.refs.join(", ")}
             </pre>
           )}
         </div>
@@ -77,7 +77,7 @@ function PreviewCard(props: DocumentPreviewProps) {
     <div
       className={cn(
         "p-2 w-60 max-w-60 bg-secondary rounded-lg text-sm relative",
-        file.content.type === "ref" ? "" : "cursor-pointer",
+        file.metadata?.refs?.length ? "" : "cursor-pointer",
       )}
     >
       <div className="flex flex-row items-center gap-2">

From 94b338a42a834024542da2cc16b497564c480e62 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 10:12:17 +0700
Subject: [PATCH 18/32] simpler file upload

---
 .../engines/python/agent/tools/interpreter.py      | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/interpreter.py b/templates/components/engines/python/agent/tools/interpreter.py
index cdce34ed9..9d19ea883 100644
--- a/templates/components/engines/python/agent/tools/interpreter.py
+++ b/templates/components/engines/python/agent/tools/interpreter.py
@@ -64,22 +64,12 @@ def _init_interpreter(self, sandbox_files: List[str] = []):
         self.interpreter = CodeInterpreter(api_key=self.api_key)
         if len(sandbox_files) > 0:
             for file_path in sandbox_files:
-                # The file path is a local path, but sometimes AI passes a sandbox file path
-                # We need to support both
                 file_name = os.path.basename(file_path)
-                if file_path.startswith("/tmp"):
-                    # The file path is a sandbox file path
-                    sandbox_file_path = file_path
-                    local_file_path = os.path.join(self.uploaded_files_dir, file_name)
-                else:
-                    # The file path is a local file path
-                    local_file_path = file_path
-                    sandbox_file_path = f"tmp/{file_name}"
-
+                local_file_path = os.path.join(self.uploaded_files_dir, file_name)
                 with open(local_file_path, "rb") as f:
                     content = f.read()
                     if self.interpreter and self.interpreter.files:
-                        self.interpreter.files.write(sandbox_file_path, content)
+                        self.interpreter.files.write(file_path, content)
             logger.info(f"Uploaded {len(sandbox_files)} files to sandbox")
 
     def _save_to_disk(self, base64_data: str, ext: str) -> FileMetadata:

From 6bb7a30204b63b1c0806fc311a9e136c01847af5 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 10:42:30 +0700
Subject: [PATCH 19/32] support for TS

---
 .../typescript/agent/tools/interpreter.ts     | 66 ++++++++++++--
 .../llamaindex/typescript/documents/helper.ts | 51 +++++++++--
 .../llamaindex/typescript/documents/upload.ts | 52 ++++++++---
 .../typescript/streaming/annotations.ts       | 89 +++++++++++--------
 4 files changed, 192 insertions(+), 66 deletions(-)

diff --git a/templates/components/engines/typescript/agent/tools/interpreter.ts b/templates/components/engines/typescript/agent/tools/interpreter.ts
index 24573c205..bdb94e2d4 100644
--- a/templates/components/engines/typescript/agent/tools/interpreter.ts
+++ b/templates/components/engines/typescript/agent/tools/interpreter.ts
@@ -7,6 +7,8 @@ import path from "node:path";
 
 export type InterpreterParameter = {
   code: string;
+  sandbox_files: string[];
+  retry_count: number;
 };
 
 export type InterpreterToolParams = {
@@ -18,6 +20,7 @@ export type InterpreterToolParams = {
 export type InterpreterToolOutput = {
   isError: boolean;
   logs: Logs;
+  text?: string;
   extraResult: InterpreterExtraResult[];
 };
 
@@ -41,8 +44,11 @@ export type InterpreterExtraResult = {
 
 const DEFAULT_META_DATA: ToolMetadata<JSONSchemaType<InterpreterParameter>> = {
   name: "interpreter",
-  description:
-    "Execute python code in a Jupyter notebook cell and return any result, stdout, stderr, display_data, and error.",
+  description: `Execute python code in a Jupyter notebook cell and return any result, stdout, stderr, display_data, and error.
+Execute Python code in a Jupyter notebook cell. The tool will return the result, stdout, stderr, display_data, and error.
+If the code needs to use a file, ALWAYS pass the file path in the sandbox_files argument.
+You have a maximum of 3 retries to get the code to run successfully.
+`,
   parameters: {
     type: "object",
     properties: {
@@ -50,6 +56,19 @@ const DEFAULT_META_DATA: ToolMetadata<JSONSchemaType<InterpreterParameter>> = {
         type: "string",
         description: "The python code to execute in a single cell.",
       },
+      sandbox_files: {
+        type: "array",
+        description:
+          "List of local file paths to be used by the code. The tool will throw an error if a file is not found.",
+        items: {
+          type: "string",
+        },
+      },
+      retry_count: {
+        type: "number",
+        description: "The number of times the tool has been retried",
+        default: 0,
+      },
     },
     required: ["code"],
   },
@@ -57,6 +76,7 @@ const DEFAULT_META_DATA: ToolMetadata<JSONSchemaType<InterpreterParameter>> = {
 
 export class InterpreterTool implements BaseTool<InterpreterParameter> {
   private readonly outputDir = "output/tools";
+  private readonly uploadedFilesDir = "output/uploaded";
   private apiKey?: string;
   private fileServerURLPrefix?: string;
   metadata: ToolMetadata<JSONSchemaType<InterpreterParameter>>;
@@ -80,33 +100,63 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
     }
   }
 
-  public async initInterpreter() {
+  public async initInterpreter(input: InterpreterParameter) {
     if (!this.codeInterpreter) {
       this.codeInterpreter = await CodeInterpreter.create({
         apiKey: this.apiKey,
       });
     }
+    // upload files to sandbox
+    if (input.sandbox_files) {
+      console.log(`Uploading ${input.sandbox_files.length} files to sandbox`);
+      for (const filePath of input.sandbox_files) {
+        const fileName = path.basename(filePath);
+        const localFilePath = path.join(this.uploadedFilesDir, fileName);
+        const content = fs.readFileSync(localFilePath);
+        await this.codeInterpreter?.files.write(filePath, content);
+      }
+      console.log(`Uploaded ${input.sandbox_files.length} files to sandbox`);
+    }
     return this.codeInterpreter;
   }
 
-  public async codeInterpret(code: string): Promise<InterpreterToolOutput> {
+  public async codeInterpret(
+    input: InterpreterParameter,
+  ): Promise<InterpreterToolOutput> {
+    console.log(
+      `Sandbox files: ${input.sandbox_files}. Retry count: ${input.retry_count}`,
+    );
+
+    if (input.retry_count >= 3) {
+      return {
+        isError: true,
+        logs: {
+          stdout: [],
+          stderr: [],
+        },
+        text: "Max retries reached",
+        extraResult: [],
+      };
+    }
+
     console.log(
-      `\n${"=".repeat(50)}\n> Running following AI-generated code:\n${code}\n${"=".repeat(50)}`,
+      `\n${"=".repeat(50)}\n> Running following AI-generated code:\n${input.code}\n${"=".repeat(50)}`,
     );
-    const interpreter = await this.initInterpreter();
-    const exec = await interpreter.notebook.execCell(code);
+    const interpreter = await this.initInterpreter(input);
+    const exec = await interpreter.notebook.execCell(input.code);
     if (exec.error) console.error("[Code Interpreter error]", exec.error);
     const extraResult = await this.getExtraResult(exec.results[0]);
     const result: InterpreterToolOutput = {
       isError: !!exec.error,
       logs: exec.logs,
+      text: exec.text,
       extraResult,
     };
     return result;
   }
 
   async call(input: InterpreterParameter): Promise<InterpreterToolOutput> {
-    const result = await this.codeInterpret(input.code);
+    const result = await this.codeInterpret(input);
     return result;
   }
 
diff --git a/templates/components/llamaindex/typescript/documents/helper.ts b/templates/components/llamaindex/typescript/documents/helper.ts
index bfe745228..52cc5d943 100644
--- a/templates/components/llamaindex/typescript/documents/helper.ts
+++ b/templates/components/llamaindex/typescript/documents/helper.ts
@@ -1,3 +1,5 @@
+import { Document } from "llamaindex";
+import crypto from "node:crypto";
 import fs from "node:fs";
 import path from "node:path";
 import { getExtractors } from "../../engine/loader";
@@ -5,23 +7,58 @@ import { getExtractors } from "../../engine/loader";
 const MIME_TYPE_TO_EXT: Record<string, string> = {
   "application/pdf": "pdf",
   "text/plain": "txt",
+  "text/csv": "csv",
   "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
     "docx",
 };
 
 const UPLOADED_FOLDER = "output/uploaded";
 
+export type FileMetadata = {
+  id: string;
+  name: string;
+  url: string;
+  refs: string[];
+};
+
 export async function storeAndParseFile(
   filename: string,
   fileBuffer: Buffer,
   mimeType: string,
+): Promise<FileMetadata> {
+  const fileMetadata = await storeFile(filename, fileBuffer, mimeType);
+  const documents: Document[] = await parseFile(fileBuffer, filename, mimeType);
+  // Update document IDs in the file metadata
+  fileMetadata.refs = documents.map((document) => document.id_ as string);
+  return fileMetadata;
+}
+
+export async function storeFile(
+  filename: string,
+  fileBuffer: Buffer,
+  mimeType: string,
 ) {
   const fileExt = MIME_TYPE_TO_EXT[mimeType];
   if (!fileExt) throw new Error(`Unsupported document type: ${mimeType}`);
 
+  const fileId = crypto.randomUUID();
+  const newFilename = `${fileId}_${sanitizeFileName(filename)}`;
+  const filepath = path.join(UPLOADED_FOLDER, newFilename);
+  const fileUrl = await saveDocument(filepath, fileBuffer);
+  return {
+    id: fileId,
+    name: newFilename,
+    url: fileUrl,
+    refs: [] as string[],
+  } as FileMetadata;
+}
+
+export async function parseFile(
+  fileBuffer: Buffer,
+  filename: string,
+  mimeType: string,
+) {
   const documents = await loadDocuments(fileBuffer, mimeType);
-  const filepath = path.join(UPLOADED_FOLDER, filename);
-  await saveDocument(filepath, fileBuffer);
   for (const document of documents) {
     document.metadata = {
       ...document.metadata,
@@ -48,12 +85,6 @@ export async function saveDocument(filepath: string, content: string | Buffer) {
   if (path.isAbsolute(filepath)) {
     throw new Error("Absolute file paths are not allowed.");
   }
-  const fileName = path.basename(filepath);
-  if (!/^[a-zA-Z0-9_.-]+$/.test(fileName)) {
-    throw new Error(
-      "File name is not allowed to contain any special characters.",
-    );
-  }
   if (!process.env.FILESERVER_URL_PREFIX) {
     throw new Error("FILESERVER_URL_PREFIX environment variable is not set.");
   }
@@ -71,3 +102,7 @@ export async function saveDocument(filepath: string, content: string | Buffer) {
   console.log(`Saved document to ${filepath}. Reachable at URL: ${fileurl}`);
   return fileurl;
 }
+
+function sanitizeFileName(fileName: string) {
+  return fileName.replace(/[^a-zA-Z0-9_.-]/g, "_");
+}
diff --git a/templates/components/llamaindex/typescript/documents/upload.ts b/templates/components/llamaindex/typescript/documents/upload.ts
index a5a817e77..7cbda1d04 100644
--- a/templates/components/llamaindex/typescript/documents/upload.ts
+++ b/templates/components/llamaindex/typescript/documents/upload.ts
@@ -1,30 +1,41 @@
-import { LLamaCloudFileService, VectorStoreIndex } from "llamaindex";
+import { Document, LLamaCloudFileService, VectorStoreIndex } from "llamaindex";
 import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
-import { storeAndParseFile } from "./helper";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { FileMetadata, parseFile, storeFile } from "./helper";
 import { runPipeline } from "./pipeline";
 
 export async function uploadDocument(
   index: VectorStoreIndex | LlamaCloudIndex,
   filename: string,
   raw: string,
-): Promise<string[]> {
+): Promise<FileMetadata> {
   const [header, content] = raw.split(",");
   const mimeType = header.replace("data:", "").replace(";base64", "");
   const fileBuffer = Buffer.from(content, "base64");
 
+  // Store file
+  const fileMetadata = await storeFile(filename, fileBuffer, mimeType);
+
+  // If the file is csv and has codeExecutorTool, we don't need to index the file.
+  if (mimeType === "text/csv" && (await hasCodeExecutorTool())) {
+    return fileMetadata;
+  }
+
   if (index instanceof LlamaCloudIndex) {
     // trigger LlamaCloudIndex API to upload the file and run the pipeline
     const projectId = await index.getProjectId();
     const pipelineId = await index.getPipelineId();
     try {
-      return [
-        await LLamaCloudFileService.addFileToPipeline(
-          projectId,
-          pipelineId,
-          new File([fileBuffer], filename, { type: mimeType }),
-          { private: "true" },
-        ),
-      ];
+      const documentId = await LLamaCloudFileService.addFileToPipeline(
+        projectId,
+        pipelineId,
+        new File([fileBuffer], filename, { type: mimeType }),
+        { private: "true" },
+      );
+      // Update file metadata with document IDs
+      fileMetadata.refs = [documentId];
+      return fileMetadata;
     } catch (error) {
       if (
         error instanceof ReferenceError &&
@@ -39,6 +50,21 @@ export async function uploadDocument(
   }
 
   // run the pipeline for other vector store indexes
-  const documents = await storeAndParseFile(filename, fileBuffer, mimeType);
-  return runPipeline(index, documents);
+  const documents: Document[] = await parseFile(fileBuffer, filename, mimeType);
+  // Update file metadata with document IDs
+  fileMetadata.refs = documents.map((document) => document.id_ as string);
+  // Run the pipeline
+  await runPipeline(index, documents);
+  return fileMetadata;
 }
+
+const hasCodeExecutorTool = async () => {
+  const codeExecutorTools = ["interpreter", "artifact"];
+
+  const configFile = path.join("config", "tools.json");
+  const toolConfig = JSON.parse(await fs.readFile(configFile, "utf8"));
+
+  const localTools = toolConfig.local || {};
+  // Check if local tools contains codeExecutorTools
+  return codeExecutorTools.some((tool) => localTools[tool] !== undefined);
+};
diff --git a/templates/components/llamaindex/typescript/streaming/annotations.ts b/templates/components/llamaindex/typescript/streaming/annotations.ts
index 10e6f52c4..aaf373a3c 100644
--- a/templates/components/llamaindex/typescript/streaming/annotations.ts
+++ b/templates/components/llamaindex/typescript/streaming/annotations.ts
@@ -3,17 +3,19 @@ import { MessageContent, MessageContentDetail } from "llamaindex";
 
 export type DocumentFileType = "csv" | "pdf" | "txt" | "docx";
 
-export type DocumentFileContent = {
-  type: "ref" | "text";
-  value: string[] | string;
+export type UploadedFileMeta = {
+  id: string;
+  name: string;
+  url?: string;
+  refs?: string[];
 };
 
 export type DocumentFile = {
   id: string;
-  filename: string;
-  filesize: number;
-  filetype: DocumentFileType;
-  content: DocumentFileContent;
+  name: string;
+  type: DocumentFileType;
+  url: string;
+  metadata: UploadedFileMeta;
 };
 
 type Annotation = {
@@ -29,28 +31,25 @@ export function isValidMessages(messages: Message[]): boolean {
 
 export function retrieveDocumentIds(messages: Message[]): string[] {
   // retrieve document Ids from the annotations of all messages (if any)
+  const documentFiles = retrieveDocumentFiles(messages);
+  return documentFiles.map((file) => file.metadata?.refs || []).flat();
+}
+
+export function retrieveDocumentFiles(messages: Message[]): DocumentFile[] {
   const annotations = getAllAnnotations(messages);
   if (annotations.length === 0) return [];
 
-  const ids: string[] = [];
-
+  const files: DocumentFile[] = [];
   for (const { type, data } of annotations) {
     if (
       type === "document_file" &&
       "files" in data &&
       Array.isArray(data.files)
     ) {
-      const files = data.files as DocumentFile[];
-      for (const file of files) {
-        if (Array.isArray(file.content.value)) {
-          // it's an array, so it's an array of doc IDs
-          ids.push(...file.content.value);
-        }
-      }
+      files.push(...data.files);
     }
   }
-
-  return ids;
+  return files;
 }
 
 export function retrieveMessageContent(messages: Message[]): MessageContent {
@@ -65,6 +64,36 @@ export function retrieveMessageContent(messages: Message[]): MessageContent {
   ];
 }
 
+function getFileContent(file: DocumentFile): string {
+  const fileMetadata = file.metadata;
+  let defaultContent = `=====File: ${fileMetadata.name}=====\n`;
+  // Include file URL if it's available
+  const urlPrefix = process.env.FILESERVER_URL_PREFIX;
+  let urlContent = "";
+  if (urlPrefix) {
+    if (fileMetadata.url) {
+      urlContent = `File URL: ${fileMetadata.url}\n`;
+    } else {
+      urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${file.name}\n`;
+    }
+  } else {
+    console.warn(
+      "Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server",
+    );
+  }
+  defaultContent += urlContent;
+
+  // Include document IDs if it's available
+  if (fileMetadata.refs) {
+    defaultContent += `Document IDs: ${fileMetadata.refs}\n`;
+  }
+  // Include sandbox file paths
+  const sandboxFilePath = `/tmp/${fileMetadata.name}`;
+  defaultContent += `Sandbox file path (instruction: only use sandbox path for artifact or code interpreter tool): ${sandboxFilePath}\n`;
+
+  return defaultContent;
+}
+
 function getAllAnnotations(messages: Message[]): Annotation[] {
   return messages.flatMap((message) =>
     (message.annotations ?? []).map((annotation) =>
@@ -131,25 +160,11 @@ function convertAnnotations(messages: Message[]): MessageContentDetail[] {
       "files" in data &&
       Array.isArray(data.files)
     ) {
-      // get all CSV files and convert their whole content to one text message
-      // currently CSV files are the only files where we send the whole content - we don't use an index
-      const csvFiles: DocumentFile[] = data.files.filter(
-        (file: DocumentFile) => file.filetype === "csv",
-      );
-      if (csvFiles && csvFiles.length > 0) {
-        const csvContents = csvFiles.map((file: DocumentFile) => {
-          const fileContent = Array.isArray(file.content.value)
-            ? file.content.value.join("\n")
-            : file.content.value;
-          return "```csv\n" + fileContent + "\n```";
-        });
-        const text =
-          "Use the following CSV content:\n" + csvContents.join("\n\n");
-        content.push({
-          type: "text",
-          text,
-        });
-      }
+      const fileContent = data.files.map(getFileContent).join("\n");
+      content.push({
+        type: "text",
+        text: fileContent,
+      });
     }
   });
 

From bbf321f9b7b553653e68fa2497868fcb4cad2b54 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 11:14:24 +0700
Subject: [PATCH 20/32] support file upload for artifact in TS

---
 .../typescript/agent/tools/code-generator.ts       | 14 ++++++++++++++
 .../engines/typescript/agent/tools/interpreter.ts  |  2 +-
 .../streaming/nextjs/app/api/sandbox/route.ts      | 13 +++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/templates/components/engines/typescript/agent/tools/code-generator.ts b/templates/components/engines/typescript/agent/tools/code-generator.ts
index eedcfa515..ec6f10e83 100644
--- a/templates/components/engines/typescript/agent/tools/code-generator.ts
+++ b/templates/components/engines/typescript/agent/tools/code-generator.ts
@@ -48,11 +48,13 @@ export type CodeArtifact = {
   port: number | null;
   file_path: string;
   code: string;
+  files?: string[];
 };
 
 export type CodeGeneratorParameter = {
   requirement: string;
   oldCode?: string;
+  sandboxFiles?: string[];
 };
 
 export type CodeGeneratorToolParams = {
@@ -75,6 +77,15 @@ const DEFAULT_META_DATA: ToolMetadata<JSONSchemaType<CodeGeneratorParameter>> =
           description: "The existing code to be modified",
           nullable: true,
         },
+        sandboxFiles: {
+          type: "array",
+          description:
+            "A list of sandbox file paths. Include these files if the code requires them.",
+          items: {
+            type: "string",
+          },
+          nullable: true,
+        },
       },
       required: ["requirement"],
     },
@@ -93,6 +104,9 @@ export class CodeGeneratorTool implements BaseTool<CodeGeneratorParameter> {
         input.requirement,
         input.oldCode,
       );
+      if (input.sandboxFiles) {
+        artifact.files = input.sandboxFiles;
+      }
       return artifact as JSONValue;
     } catch (error) {
       return { isError: true };
diff --git a/templates/components/engines/typescript/agent/tools/interpreter.ts b/templates/components/engines/typescript/agent/tools/interpreter.ts
index bdb94e2d4..8d3564934 100644
--- a/templates/components/engines/typescript/agent/tools/interpreter.ts
+++ b/templates/components/engines/typescript/agent/tools/interpreter.ts
@@ -134,7 +134,7 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
           stdout: [],
           stderr: [],
         },
-        text: "Max retries reached",
+        text: "Failed to execute the code after 3 retries. Explain the error to the user and suggest a fix.",
         extraResult: [],
       };
     }
diff --git a/templates/types/streaming/nextjs/app/api/sandbox/route.ts b/templates/types/streaming/nextjs/app/api/sandbox/route.ts
index cfc200874..14bb4f906 100644
--- a/templates/types/streaming/nextjs/app/api/sandbox/route.ts
+++ b/templates/types/streaming/nextjs/app/api/sandbox/route.ts
@@ -32,6 +32,7 @@ type CodeArtifact = {
   port: number | null;
   file_path: string;
   code: string;
+  files?: string[];
 };
 
 const sandboxTimeout = 10 * 60 * 1000; // 10 minute in ms
@@ -82,6 +83,11 @@ export async function POST(req: Request) {
     }
   }
 
+  // Copy files
+  if (artifact.files) {
+    await uploadFiles(sbx, artifact.files);
+  }
+
   // Copy code to fs
   if (artifact.code && Array.isArray(artifact.code)) {
     artifact.code.forEach(async (file) => {
@@ -119,6 +125,13 @@ export async function POST(req: Request) {
   }
 }
 
+async function uploadFiles(sbx: Sandbox | CodeInterpreter, files: string[]) {
+  files.forEach(async (file) => {
+    await sbx.files.write(file, file);
+    console.log(`Copied file to ${file} in ${sbx.sandboxID}`);
+  });
+}
+
 async function downloadCellResults(
   cellResults?: Result[],
 ): Promise<Array<{ url: string; filename: string }>> {

From 852e6ec8f1a175f93c11571c21eee6694d812cd9 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 11:40:49 +0700
Subject: [PATCH 21/32] enhance file path

---
 .../typescript/agent/tools/interpreter.ts     | 24 ++++++++++---------
 .../components/routers/python/sandbox.py      |  4 ++--
 .../streaming/nextjs/app/api/sandbox/route.ts | 18 +++++++-------
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/templates/components/engines/typescript/agent/tools/interpreter.ts b/templates/components/engines/typescript/agent/tools/interpreter.ts
index 8d3564934..ec88505f1 100644
--- a/templates/components/engines/typescript/agent/tools/interpreter.ts
+++ b/templates/components/engines/typescript/agent/tools/interpreter.ts
@@ -7,8 +7,8 @@ import path from "node:path";
 
 export type InterpreterParameter = {
   code: string;
-  sandbox_files: string[];
-  retry_count: number;
+  sandboxFiles: string[];
+  retryCount: number;
 };
 
 export type InterpreterToolParams = {
@@ -22,6 +22,7 @@ export type InterpreterToolOutput = {
   logs: Logs;
   text?: string;
   extraResult: InterpreterExtraResult[];
+  retryCount?: number;
 };
 
 type InterpreterExtraType =
@@ -56,7 +57,7 @@ You have a maximum of 3 retries to get the code to run successfully.
         type: "string",
         description: "The python code to execute in a single cell.",
       },
-      sandbox_files: {
+      sandboxFiles: {
         type: "array",
         description:
           "List of local file paths to be used by the code. The tool will throw an error if a file is not found.",
@@ -64,7 +65,7 @@ You have a maximum of 3 retries to get the code to run successfully.
           type: "string",
         },
       },
-      retry_count: {
+      retryCount: {
         type: "number",
         description: "The number of times the tool has been retried",
         default: 0,
@@ -107,15 +108,15 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
       });
     }
     // upload files to sandbox
-    if (input.sandbox_files) {
-      console.log(`Uploading ${input.sandbox_files.length} files to sandbox`);
-      for (const filePath of input.sandbox_files) {
+    if (input.sandboxFiles) {
+      console.log(`Uploading ${input.sandboxFiles.length} files to sandbox`);
+      for (const filePath of input.sandboxFiles) {
         const fileName = path.basename(filePath);
         const localFilePath = path.join(this.uploadedFilesDir, fileName);
         const content = fs.readFileSync(localFilePath);
         await this.codeInterpreter?.files.write(filePath, content);
       }
-      console.log(`Uploaded ${input.sandbox_files.length} files to sandbox`);
+      console.log(`Uploaded ${input.sandboxFiles.length} files to sandbox`);
     }
     return this.codeInterpreter;
   }
@@ -124,17 +125,17 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
     input: InterpreterParameter,
   ): Promise<InterpreterToolOutput> {
     console.log(
-      `Sandbox files: ${input.sandbox_files}. Retry count: ${input.retry_count}`,
+      `Sandbox files: ${input.sandboxFiles}. Retry count: ${input.retryCount}`,
     );
 
-    if (input.retry_count >= 3) {
+    if (input.retryCount >= 3) {
       return {
         isError: true,
         logs: {
           stdout: [],
           stderr: [],
         },
-        text: "Failed to execute the code after 3 retries. Explain the error to the user and suggest a fix.",
+        text: "Max retries reached",
         extraResult: [],
       };
     }
@@ -151,6 +152,7 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
       logs: exec.logs,
       text: exec.text,
       extraResult,
+      retryCount: input.retryCount + 1,
     };
     return result;
   }
diff --git a/templates/components/routers/python/sandbox.py b/templates/components/routers/python/sandbox.py
index 57ef31167..9efe146fd 100644
--- a/templates/components/routers/python/sandbox.py
+++ b/templates/components/routers/python/sandbox.py
@@ -150,7 +150,7 @@ def _upload_files(
 ) -> None:
     for file_path in sandbox_files:
         file_name = os.path.basename(file_path)
-        local_file_path = f"output/uploaded/{file_name}"
+        local_file_path = os.path.join("output", "uploaded", file_name)
         with open(local_file_path, "rb") as f:
             content = f.read()
             sandbox.files.write(file_path, content)
@@ -172,7 +172,7 @@ def _download_cell_results(cell_results: Optional[List]) -> List[Dict[str, str]]
                 data = result[ext]
 
                 if ext in ["png", "svg", "jpeg", "pdf"]:
-                    file_path = f"output/tools/{uuid.uuid4()}.{ext}"
+                    file_path = os.path.join("output", "tools", f"{uuid.uuid4()}.{ext}")
                     base64_data = data
                     buffer = base64.b64decode(base64_data)
                     file_meta = save_file(content=buffer, file_path=file_path)
diff --git a/templates/types/streaming/nextjs/app/api/sandbox/route.ts b/templates/types/streaming/nextjs/app/api/sandbox/route.ts
index 14bb4f906..6bbd15177 100644
--- a/templates/types/streaming/nextjs/app/api/sandbox/route.ts
+++ b/templates/types/streaming/nextjs/app/api/sandbox/route.ts
@@ -19,6 +19,8 @@ import {
   Result,
   Sandbox,
 } from "@e2b/code-interpreter";
+import fs from "node:fs/promises";
+import path from "node:path";
 import { saveDocument } from "../chat/llamaindex/documents/helper";
 
 type CodeArtifact = {
@@ -85,7 +87,14 @@ export async function POST(req: Request) {
 
   // Copy files
   if (artifact.files) {
-    await uploadFiles(sbx, artifact.files);
+    artifact.files.forEach(async (sandboxFilePath) => {
+      const fileName = path.basename(sandboxFilePath);
+      const localFilePath = path.join("output", "uploaded", fileName);
+      const fileContent = await fs.readFile(localFilePath);
+
+      await sbx.files.write(sandboxFilePath, fileContent);
+      console.log(`Copied file to ${sandboxFilePath} in ${sbx.sandboxID}`);
+    });
   }
 
   // Copy code to fs
@@ -125,13 +134,6 @@ export async function POST(req: Request) {
   }
 }
 
-async function uploadFiles(sbx: Sandbox | CodeInterpreter, files: string[]) {
-  files.forEach(async (file) => {
-    await sbx.files.write(file, file);
-    console.log(`Copied file to ${file} in ${sbx.sandboxID}`);
-  });
-}
-
 async function downloadCellResults(
   cellResults?: Result[],
 ): Promise<Array<{ url: string; filename: string }>> {

From 5ae6b578a746a209e8f24e7d702fb2ca521bed2e Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 11:48:12 +0700
Subject: [PATCH 22/32] enhance code

---
 .../engines/typescript/agent/tools/interpreter.ts     | 11 ++++++-----
 .../llamaindex/typescript/streaming/annotations.ts    |  3 +--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/templates/components/engines/typescript/agent/tools/interpreter.ts b/templates/components/engines/typescript/agent/tools/interpreter.ts
index ec88505f1..ae386a13f 100644
--- a/templates/components/engines/typescript/agent/tools/interpreter.ts
+++ b/templates/components/engines/typescript/agent/tools/interpreter.ts
@@ -7,8 +7,8 @@ import path from "node:path";
 
 export type InterpreterParameter = {
   code: string;
-  sandboxFiles: string[];
-  retryCount: number;
+  sandboxFiles?: string[];
+  retryCount?: number;
 };
 
 export type InterpreterToolParams = {
@@ -46,7 +46,6 @@ export type InterpreterExtraResult = {
 const DEFAULT_META_DATA: ToolMetadata<JSONSchemaType<InterpreterParameter>> = {
   name: "interpreter",
   description: `Execute python code in a Jupyter notebook cell and return any result, stdout, stderr, display_data, and error.
-Execute Python code in a Jupyter notebook cell. The tool will return the result, stdout, stderr, display_data, and error.
 If the code needs to use a file, ALWAYS pass the file path in the sandbox_files argument.
 You have a maximum of 3 retries to get the code to run successfully.
 `,
@@ -64,11 +63,13 @@ You have a maximum of 3 retries to get the code to run successfully.
         items: {
           type: "string",
         },
+        nullable: true,
       },
       retryCount: {
         type: "number",
         description: "The number of times the tool has been retried",
         default: 0,
+        nullable: true,
       },
     },
     required: ["code"],
@@ -128,7 +129,7 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
       `Sandbox files: ${input.sandboxFiles}. Retry count: ${input.retryCount}`,
     );
 
-    if (input.retryCount >= 3) {
+    if (input.retryCount && input.retryCount >= 3) {
       return {
         isError: true,
         logs: {
@@ -152,7 +153,7 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
       logs: exec.logs,
       text: exec.text,
       extraResult,
-      retryCount: input.retryCount + 1,
+      retryCount: input.retryCount ? input.retryCount + 1 : 1,
     };
     return result;
   }
diff --git a/templates/components/llamaindex/typescript/streaming/annotations.ts b/templates/components/llamaindex/typescript/streaming/annotations.ts
index aaf373a3c..ca593677a 100644
--- a/templates/components/llamaindex/typescript/streaming/annotations.ts
+++ b/templates/components/llamaindex/typescript/streaming/annotations.ts
@@ -12,7 +12,6 @@ export type UploadedFileMeta = {
 
 export type DocumentFile = {
   id: string;
-  name: string;
   type: DocumentFileType;
   url: string;
   metadata: UploadedFileMeta;
@@ -74,7 +73,7 @@ function getFileContent(file: DocumentFile): string {
     if (fileMetadata.url) {
       urlContent = `File URL: ${fileMetadata.url}\n`;
     } else {
-      urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${file.name}\n`;
+      urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${fileMetadata.name}\n`;
     }
   } else {
     console.warn(

From c64e2ba465da2d1796ac5dd48173cfb88f48b57a Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 12:17:16 +0700
Subject: [PATCH 23/32] revise vercel streaming

---
 .../app/api/routers/vercel_response.py        | 128 ++++++++++--------
 1 file changed, 72 insertions(+), 56 deletions(-)

diff --git a/templates/types/streaming/fastapi/app/api/routers/vercel_response.py b/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
index 8999f36f8..fc5f03e03 100644
--- a/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
+++ b/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
@@ -45,51 +45,13 @@ async def content_generator(
         chat_data: ChatData,
         background_tasks: BackgroundTasks,
     ):
-        # Yield the events from the event handler
-        async def _event_generator():
-            async for event in event_handler.async_event_gen():
-                event_response = event.to_response()
-                if event_response is not None:
-                    yield cls.convert_data(event_response)
-
-        # Yield the text response
-        async def _chat_response_generator():
-            # Wait for the response from the chat engine
-            result = await response
-
-            # Once we got a source node, start a background task to download the files (if needed)
-            cls.process_response_nodes(result.source_nodes, background_tasks)
-
-            # Yield the source nodes
-            yield cls.convert_data(
-                {
-                    "type": "sources",
-                    "data": {
-                        "nodes": [
-                            SourceNodes.from_source_node(node).model_dump()
-                            for node in result.source_nodes
-                        ]
-                    },
-                }
-            )
-
-            final_response = ""
-            async for token in result.async_response_gen():
-                final_response += token
-                yield cls.convert_text(token)
-
-            # Generate next questions if next question prompt is configured
-            question_data = await cls._generate_next_questions(
-                chat_data.messages, final_response
-            )
-            if question_data:
-                yield cls.convert_data(question_data)
-
-            # the text_generator is the leading stream, once it's finished, also finish the event stream
-            event_handler.is_done = True
+        chat_response_generator = cls._chat_response_generator(
+            response, background_tasks, event_handler, chat_data
+        )
+        event_generator = cls._event_generator(event_handler)
 
         # Merge the chat response generator and the event generator
-        combine = stream.merge(_chat_response_generator(), _event_generator())
+        combine = stream.merge(chat_response_generator, event_generator)
         is_stream_started = False
         async with combine.stream() as streamer:
             async for output in streamer:
@@ -103,17 +65,60 @@ async def _chat_response_generator():
                 if await request.is_disconnected():
                     break
 
-    @staticmethod
-    async def _generate_next_questions(chat_history: List[Message], response: str):
-        questions = await NextQuestionSuggestion.suggest_next_questions(
-            chat_history, response
-        )
-        if questions:
-            return {
-                "type": "suggested_questions",
-                "data": questions,
+    @classmethod
+    async def _event_generator(cls, event_handler: EventCallbackHandler):
+        """
+        Yield the events from the event handler
+        """
+        async for event in event_handler.async_event_gen():
+            event_response = event.to_response()
+            if event_response is not None:
+                yield cls.convert_data(event_response)
+
+    @classmethod
+    async def _chat_response_generator(
+        cls,
+        response: Awaitable[StreamingAgentChatResponse],
+        background_tasks: BackgroundTasks,
+        event_handler: EventCallbackHandler,
+        chat_data: ChatData,
+    ):
+        """
+        Yield the text response and source nodes from the chat engine
+        """
+        # Wait for the response from the chat engine
+        result = await response
+
+        # Once we got a source node, start a background task to download the files (if needed)
+        cls._process_response_nodes(result.source_nodes, background_tasks)
+
+        # Yield the source nodes
+        yield cls.convert_data(
+            {
+                "type": "sources",
+                "data": {
+                    "nodes": [
+                        SourceNodes.from_source_node(node).model_dump()
+                        for node in result.source_nodes
+                    ]
+                },
             }
-        return None
+        )
+
+        final_response = ""
+        async for token in result.async_response_gen():
+            final_response += token
+            yield cls.convert_text(token)
+
+        # Generate next questions if next question prompt is configured
+        question_data = await cls._generate_next_questions(
+            chat_data.messages, final_response
+        )
+        if question_data:
+            yield cls.convert_data(question_data)
+
+        # the text_generator is the leading stream, once it's finished, also finish the event stream
+        event_handler.is_done = True
 
     @classmethod
     def convert_text(cls, token: str):
@@ -126,9 +131,8 @@ def convert_data(cls, data: dict):
         data_str = json.dumps(data)
         return f"{cls.DATA_PREFIX}[{data_str}]\n"
 
-    @classmethod
-    def process_response_nodes(
-        cls,
+    @staticmethod
+    def _process_response_nodes(
         source_nodes: List[NodeWithScore],
         background_tasks: BackgroundTasks,
     ):
@@ -144,3 +148,15 @@ def process_response_nodes(
                 "LlamaCloud is not configured. Skipping post processing of nodes"
             )
             pass
+
+    @staticmethod
+    async def _generate_next_questions(chat_history: List[Message], response: str):
+        questions = await NextQuestionSuggestion.suggest_next_questions(
+            chat_history, response
+        )
+        if questions:
+            return {
+                "type": "suggested_questions",
+                "data": questions,
+            }
+        return None

From 36cdb1e51cbaa673aa3eeb1c14b0a671314b3686 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 12:20:57 +0700
Subject: [PATCH 24/32] remove redundant id

---
 .../components/llamaindex/typescript/streaming/annotations.ts    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/templates/components/llamaindex/typescript/streaming/annotations.ts b/templates/components/llamaindex/typescript/streaming/annotations.ts
index ca593677a..f8de88f8b 100644
--- a/templates/components/llamaindex/typescript/streaming/annotations.ts
+++ b/templates/components/llamaindex/typescript/streaming/annotations.ts
@@ -11,7 +11,6 @@ export type UploadedFileMeta = {
 };
 
 export type DocumentFile = {
-  id: string;
   type: DocumentFileType;
   url: string;
   metadata: UploadedFileMeta;

From e0921fec2aec9270478ef92750d0964f7e27305c Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 14:24:26 +0700
Subject: [PATCH 25/32] add show file widget to the

---
 helpers/tools.ts                                  |  2 +-
 .../ui/chat/chat-message/chat-sources.tsx         | 12 +++++++++---
 .../components/ui/chat/chat-message/markdown.tsx  | 15 ++++++++++++++-
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/helpers/tools.ts b/helpers/tools.ts
index c7349569c..262e71b1d 100644
--- a/helpers/tools.ts
+++ b/helpers/tools.ts
@@ -139,7 +139,7 @@ For better results, you can specify the region parameter to get results from a s
     dependencies: [
       {
         name: "e2b_code_interpreter",
-        version: "^0.0.11b38",
+        version: "0.0.11b38",
       },
     ],
     supportedFrameworks: ["fastapi", "express", "nextjs"],
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
index 929e199ca..57b576db4 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
@@ -97,10 +97,16 @@ export function SourceNumberButton({
   );
 }
 
-function DocumentInfo({ document }: { document: Document }) {
-  if (!document.sources.length) return null;
+export function DocumentInfo({ document }: { document: Document }) {
   const { url, sources } = document;
-  const fileName = sources[0].metadata.file_name as string | undefined;
+  let fileName: string | undefined;
+  if (sources.length > 0) {
+    fileName = sources[0].metadata.file_name as string | undefined;
+  } else {
+    // Extract filename from URL if sources is empty
+    const urlParts = url.split("/");
+    fileName = urlParts[urlParts.length - 1];
+  }
   const fileExt = fileName?.split(".").pop();
   const fileImage = fileExt ? FileIcon[fileExt as DocumentFileType] : null;
 
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
index 48aeebf6e..bdee5c3f7 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
@@ -6,7 +6,8 @@ import remarkGfm from "remark-gfm";
 import remarkMath from "remark-math";
 
 import { SourceData } from "..";
-import { SourceNumberButton } from "./chat-sources";
+import { useClientConfig } from "../hooks/use-config";
+import { DocumentInfo, SourceNumberButton } from "./chat-sources";
 import { CodeBlock } from "./codeblock";
 
 const MemoizedReactMarkdown: FC<Options> = memo(
@@ -78,6 +79,7 @@ export default function Markdown({
   sources?: SourceData;
 }) {
   const processedContent = preprocessContent(content, sources);
+  const { backend } = useClientConfig();
 
   return (
     <MemoizedReactMarkdown
@@ -119,6 +121,17 @@ export default function Markdown({
           );
         },
         a({ href, children }) {
+          // If href starts with http://localhost:8000/api/files, render DocumentPreview
+          if (href?.startsWith(backend + "/api/files")) {
+            return (
+              <DocumentInfo
+                document={{
+                  url: href,
+                  sources: [],
+                }}
+              />
+            );
+          }
           // If a text link starts with 'citation:', then render it as a citation reference
           if (
             Array.isArray(children) &&

From a3c1c5547de33d51c10bda2d6a5d23bbd2590502 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 15:09:31 +0700
Subject: [PATCH 26/32] allow upload file with empty index store

---
 .../typescript/documents/pipeline.ts          | 20 ++++++++++++++-----
 .../llamaindex/typescript/documents/upload.ts |  2 +-
 .../nextjs/app/api/chat/upload/route.ts       |  5 -----
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/templates/components/llamaindex/typescript/documents/pipeline.ts b/templates/components/llamaindex/typescript/documents/pipeline.ts
index 6f9589cd2..01b52fd5d 100644
--- a/templates/components/llamaindex/typescript/documents/pipeline.ts
+++ b/templates/components/llamaindex/typescript/documents/pipeline.ts
@@ -7,7 +7,7 @@ import {
 } from "llamaindex";
 
 export async function runPipeline(
-  currentIndex: VectorStoreIndex,
+  currentIndex: VectorStoreIndex | null,
   documents: Document[],
 ) {
   // Use ingestion pipeline to process the documents into nodes and add them to the vector store
@@ -21,8 +21,18 @@ export async function runPipeline(
     ],
   });
   const nodes = await pipeline.run({ documents });
-  await currentIndex.insertNodes(nodes);
-  currentIndex.storageContext.docStore.persist();
-  console.log("Added nodes to the vector store.");
-  return documents.map((document) => document.id_);
+  if (currentIndex) {
+    await currentIndex.insertNodes(nodes);
+    currentIndex.storageContext.docStore.persist();
+    console.log("Added nodes to the vector store.");
+    return documents.map((document) => document.id_);
+  } else {
+    // Initialize a new index with the documents
+    const newIndex = await VectorStoreIndex.fromDocuments(documents);
+    newIndex.storageContext.docStore.persist();
+    console.log(
+      "Got empty index, created new index with the uploaded documents",
+    );
+    return documents.map((document) => document.id_);
+  }
 }
diff --git a/templates/components/llamaindex/typescript/documents/upload.ts b/templates/components/llamaindex/typescript/documents/upload.ts
index 7cbda1d04..158b05a1a 100644
--- a/templates/components/llamaindex/typescript/documents/upload.ts
+++ b/templates/components/llamaindex/typescript/documents/upload.ts
@@ -6,7 +6,7 @@ import { FileMetadata, parseFile, storeFile } from "./helper";
 import { runPipeline } from "./pipeline";
 
 export async function uploadDocument(
-  index: VectorStoreIndex | LlamaCloudIndex,
+  index: VectorStoreIndex | LlamaCloudIndex | null,
   filename: string,
   raw: string,
 ): Promise<FileMetadata> {
diff --git a/templates/types/streaming/nextjs/app/api/chat/upload/route.ts b/templates/types/streaming/nextjs/app/api/chat/upload/route.ts
index 001d3a0ef..382a94c93 100644
--- a/templates/types/streaming/nextjs/app/api/chat/upload/route.ts
+++ b/templates/types/streaming/nextjs/app/api/chat/upload/route.ts
@@ -23,11 +23,6 @@ export async function POST(request: NextRequest) {
       );
     }
     const index = await getDataSource(params);
-    if (!index) {
-      throw new Error(
-        `StorageContext is empty - call 'npm run generate' to generate the storage first`,
-      );
-    }
     return NextResponse.json(await uploadDocument(index, filename, base64));
   } catch (error) {
     console.error("[Upload API]", error);

From 7d9dee2b64034bca1cfbf5bfb11d2fa9a2da2dc7 Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Tue, 15 Oct 2024 15:29:17 +0700
Subject: [PATCH 27/32] add data scientist use case

---
 questions/simple.ts | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/questions/simple.ts b/questions/simple.ts
index 6a3477eee..25cc89bf4 100644
--- a/questions/simple.ts
+++ b/questions/simple.ts
@@ -5,7 +5,13 @@ import { getTools } from "../helpers/tools";
 import { ModelConfig, TemplateFramework } from "../helpers/types";
 import { PureQuestionArgs, QuestionResults } from "./types";
 import { askPostInstallAction, questionHandlers } from "./utils";
-type AppType = "rag" | "code_artifact" | "multiagent" | "extractor";
+
+type AppType =
+  | "rag"
+  | "code_artifact"
+  | "multiagent"
+  | "extractor"
+  | "data_scientist";
 
 type SimpleAnswers = {
   appType: AppType;
@@ -25,6 +31,7 @@ export const askSimpleQuestions = async (
       message: "What app do you want to build?",
       choices: [
         { title: "Agentic RAG", value: "rag" },
+        { title: "Data Scientist", value: "data_scientist" },
         { title: "Code Artifact Agent", value: "code_artifact" },
         { title: "Multi-Agent Report Gen", value: "multiagent" },
         { title: "Structured extraction", value: "extractor" },
@@ -109,6 +116,12 @@ const convertAnswers = (answers: SimpleAnswers): QuestionResults => {
       frontend: true,
       dataSources: [EXAMPLE_FILE],
     },
+    data_scientist: {
+      template: "streaming",
+      tools: getTools(["interpreter", "document_generator"]),
+      frontend: true,
+      dataSources: [],
+    },
     code_artifact: {
       template: "streaming",
       tools: getTools(["artifact"]),

From 3b91e7b71cb2931f56a882d7f8786a5cc6df6dca Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Tue, 15 Oct 2024 15:50:08 +0700
Subject: [PATCH 28/32] use GPT4o model for data scientist and code artifact

---
 questions/simple.ts | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/questions/simple.ts b/questions/simple.ts
index 25cc89bf4..29930c1ae 100644
--- a/questions/simple.ts
+++ b/questions/simple.ts
@@ -18,7 +18,6 @@ type SimpleAnswers = {
   language: TemplateFramework;
   useLlamaCloud: boolean;
   llamaCloudKey?: string;
-  modelConfig: ModelConfig;
 };
 
 export const askSimpleQuestions = async (
@@ -87,28 +86,36 @@ export const askSimpleQuestions = async (
     }
   }
 
-  const modelConfig = await askModelConfig({
-    openAiKey: args.openAiKey,
-    askModels: args.askModels ?? false,
-    framework: language,
-  });
-
-  const results = convertAnswers({
+  const results = await convertAnswers(args, {
     appType,
     language,
     useLlamaCloud,
     llamaCloudKey,
-    modelConfig,
   });
 
   results.postInstallAction = await askPostInstallAction(results);
   return results;
 };
 
-const convertAnswers = (answers: SimpleAnswers): QuestionResults => {
+const convertAnswers = async (
+  args: PureQuestionArgs,
+  answers: SimpleAnswers,
+): Promise<QuestionResults> => {
+  const MODEL_GPT4o: ModelConfig = {
+    provider: "openai",
+    apiKey: args.openAiKey,
+    model: "gpt-4o",
+    embeddingModel: "text-embedding-3-large",
+    dimensions: 1536,
+    isConfigured(): boolean {
+      return !!args.openAiKey;
+    },
+  };
   const lookup: Record<
     AppType,
-    Pick<QuestionResults, "template" | "tools" | "frontend" | "dataSources">
+    Pick<QuestionResults, "template" | "tools" | "frontend" | "dataSources"> & {
+      modelConfig?: ModelConfig;
+    }
   > = {
     rag: {
       template: "streaming",
@@ -121,12 +128,14 @@ const convertAnswers = (answers: SimpleAnswers): QuestionResults => {
       tools: getTools(["interpreter", "document_generator"]),
       frontend: true,
       dataSources: [],
+      modelConfig: MODEL_GPT4o,
     },
     code_artifact: {
       template: "streaming",
       tools: getTools(["artifact"]),
       frontend: true,
       dataSources: [],
+      modelConfig: MODEL_GPT4o,
     },
     multiagent: {
       template: "multiagent",
@@ -153,11 +162,16 @@ const convertAnswers = (answers: SimpleAnswers): QuestionResults => {
     llamaCloudKey: answers.llamaCloudKey,
     useLlamaParse: answers.useLlamaCloud,
     llamapack: "",
-    postInstallAction: "none",
     vectorDb: answers.useLlamaCloud ? "llamacloud" : "none",
-    modelConfig: answers.modelConfig,
     observability: "none",
     ...results,
+    modelConfig:
+      results.modelConfig ??
+      (await askModelConfig({
+        openAiKey: args.openAiKey,
+        askModels: args.askModels ?? false,
+        framework: answers.language,
+      })),
     frontend: answers.language === "nextjs" ? false : results.frontend,
   };
 };

From 954113edb6adedd975f3614f43f22f384e834736 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Tue, 15 Oct 2024 16:56:30 +0700
Subject: [PATCH 29/32] update comments

---
 .../nextjs/app/components/ui/chat/chat-message/markdown.tsx | 2 +-
 .../types/streaming/nextjs/app/components/ui/chat/index.ts  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
index d27052659..3775a6f76 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
@@ -122,7 +122,7 @@ export default function Markdown({
           );
         },
         a({ href, children }) {
-          // If href starts with http://localhost:8000/api/files, render DocumentPreview
+          // If href starts with `{backend}/api/files`, then it's a local document and we use DocumenInfo for rendering
           if (href?.startsWith(backend + "/api/files")) {
             return (
               <DocumentInfo
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/index.ts b/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
index e8b55ac25..2c317d143 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
@@ -23,16 +23,16 @@ export type DocumentFileType = "csv" | "pdf" | "txt" | "docx";
 
 export type UploadedFileMeta = {
   id: string;
-  name: string;
+  name: string; // The uploaded file name in the backend (including uuid and sanitized)
   url?: string;
   refs?: string[];
 };
 
 export type DocumentFile = {
-  filename: string;
+  filename: string; // The original file name
   filesize: number;
   filetype: DocumentFileType;
-  metadata?: UploadedFileMeta;
+  metadata?: UploadedFileMeta; // undefined when the file is not uploaded yet
 };
 
 export type DocumentFileData = {

From 624aea70e88a43fbf508e979a79d788a49e63aa1 Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Tue, 15 Oct 2024 17:16:43 +0700
Subject: [PATCH 30/32] use previewcard to render documents

---
 .../ui/chat/chat-message/chat-sources.tsx     | 81 ++++++-------------
 .../ui/chat/chat-message/markdown.tsx         | 27 ++++---
 .../nextjs/app/components/ui/chat/index.ts    |  6 ++
 .../app/components/ui/document-preview.tsx    | 32 +++++---
 4 files changed, 71 insertions(+), 75 deletions(-)

diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
index 57b576db4..14fa25727 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
@@ -1,8 +1,7 @@
-import { Check, Copy, FileText } from "lucide-react";
-import Image from "next/image";
+import { Check, Copy } from "lucide-react";
 import { useMemo } from "react";
 import { Button } from "../../button";
-import { FileIcon } from "../../document-preview";
+import { PreviewCard } from "../../document-preview";
 import {
   HoverCard,
   HoverCardContent,
@@ -49,13 +48,7 @@ export function ChatSources({ data }: { data: SourceData }) {
   );
 }
 
-export function SourceInfo({
-  node,
-  index,
-}: {
-  node?: SourceNode;
-  index: number;
-}) {
+function SourceInfo({ node, index }: { node?: SourceNode; index: number }) {
   if (!node) return <SourceNumberButton index={index} />;
   return (
     <HoverCard>
@@ -97,55 +90,33 @@ export function SourceNumberButton({
   );
 }
 
-export function DocumentInfo({ document }: { document: Document }) {
+export function DocumentInfo({
+  document,
+  className,
+}: {
+  document: Document;
+  className?: string;
+}) {
   const { url, sources } = document;
-  let fileName: string | undefined;
-  if (sources.length > 0) {
-    fileName = sources[0].metadata.file_name as string | undefined;
-  } else {
-    // Extract filename from URL if sources is empty
-    const urlParts = url.split("/");
-    fileName = urlParts[urlParts.length - 1];
-  }
-  const fileExt = fileName?.split(".").pop();
-  const fileImage = fileExt ? FileIcon[fileExt as DocumentFileType] : null;
+  // Extract filename from URL
+  const urlParts = url.split("/");
+  const fileName = urlParts.length > 0 ? urlParts[urlParts.length - 1] : url;
+  const fileExt = fileName?.split(".").pop() as DocumentFileType | undefined;
+
+  const previewFile = {
+    filename: fileName,
+    filetype: fileExt,
+  };
 
   const DocumentDetail = (
-    <div
-      key={url}
-      className="h-28 w-48 flex flex-col justify-between p-4 border rounded-md shadow-md cursor-pointer"
-    >
-      <p
-        title={fileName}
-        className={cn(
-          fileName ? "truncate" : "text-blue-900 break-words",
-          "text-left",
-        )}
-      >
-        {fileName ?? url}
-      </p>
-      <div className="flex justify-between items-center">
-        <div className="space-x-2 flex">
-          {sources.map((node: SourceNode, index: number) => {
-            return (
-              <div key={node.id}>
-                <SourceInfo node={node} index={index} />
-              </div>
-            );
-          })}
-        </div>
-        {fileImage ? (
-          <div className="relative h-8 w-8 shrink-0 overflow-hidden rounded-md">
-            <Image
-              className="h-full w-auto"
-              priority
-              src={fileImage}
-              alt="Icon"
-            />
+    <div className={`relative ${className}`}>
+      <PreviewCard className={"cursor-pointer"} file={previewFile} />
+      <div className="absolute bottom-2 left-2 space-x-2 flex">
+        {sources.map((node: SourceNode, index: number) => (
+          <div key={node.id}>
+            <SourceInfo node={node} index={index} />
           </div>
-        ) : (
-          <FileText className="text-gray-500" />
-        )}
+        ))}
       </div>
     </div>
   );
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
index 3775a6f76..7748176f3 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
@@ -5,7 +5,7 @@ import rehypeKatex from "rehype-katex";
 import remarkGfm from "remark-gfm";
 import remarkMath from "remark-math";
 
-import { SourceData } from "..";
+import { DOCUMENT_FILE_TYPES, DocumentFileType, SourceData } from "..";
 import { useClientConfig } from "../hooks/use-config";
 import { DocumentInfo, SourceNumberButton } from "./chat-sources";
 import { CodeBlock } from "./codeblock";
@@ -124,14 +124,23 @@ export default function Markdown({
         a({ href, children }) {
           // If href starts with `{backend}/api/files`, then it's a local document and we use DocumenInfo for rendering
           if (href?.startsWith(backend + "/api/files")) {
-            return (
-              <DocumentInfo
-                document={{
-                  url: href,
-                  sources: [],
-                }}
-              />
-            );
+            // Check if the file is document file type
+            const fileExtension = href.split(".").pop()?.toLowerCase();
+
+            if (
+              fileExtension &&
+              DOCUMENT_FILE_TYPES.includes(fileExtension as DocumentFileType)
+            ) {
+              return (
+                <DocumentInfo
+                  document={{
+                    url: href,
+                    sources: [],
+                  }}
+                  className="mb-2 mt-2"
+                />
+              );
+            }
           }
           // If a text link starts with 'citation:', then render it as a citation reference
           if (
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/index.ts b/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
index 2c317d143..eedd4ceb2 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/index.ts
@@ -20,6 +20,12 @@ export type ImageData = {
 };
 
 export type DocumentFileType = "csv" | "pdf" | "txt" | "docx";
+export const DOCUMENT_FILE_TYPES: DocumentFileType[] = [
+  "csv",
+  "pdf",
+  "txt",
+  "docx",
+];
 
 export type UploadedFileMeta = {
   id: string;
diff --git a/templates/types/streaming/nextjs/app/components/ui/document-preview.tsx b/templates/types/streaming/nextjs/app/components/ui/document-preview.tsx
index 170c67cf3..b0f9bd900 100644
--- a/templates/types/streaming/nextjs/app/components/ui/document-preview.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/document-preview.tsx
@@ -37,7 +37,7 @@ export function DocumentPreview(props: DocumentPreviewProps) {
     <Drawer direction="left">
       <DrawerTrigger asChild>
         <div>
-          <PreviewCard {...props} />
+          <PreviewCard className="cursor-pointer" {...props} />
         </div>
       </DrawerTrigger>
       <DrawerContent className="w-3/5 mt-24 h-full max-h-[96%] ">
@@ -71,31 +71,41 @@ export const FileIcon: Record<DocumentFileType, string> = {
   txt: TxtIcon,
 };
 
-function PreviewCard(props: DocumentPreviewProps) {
-  const { onRemove, file } = props;
+export function PreviewCard(props: {
+  file: {
+    filename: string;
+    filesize?: number;
+    filetype?: DocumentFileType;
+  };
+  onRemove?: () => void;
+  className?: string;
+}) {
+  const { onRemove, file, className } = props;
   return (
     <div
       className={cn(
         "p-2 w-60 max-w-60 bg-secondary rounded-lg text-sm relative",
-        file.metadata?.refs?.length ? "" : "cursor-pointer",
+        className,
       )}
     >
       <div className="flex flex-row items-center gap-2">
-        <div className="relative h-8 w-8 shrink-0 overflow-hidden rounded-md">
+        <div className="relative h-8 w-8 shrink-0 overflow-hidden rounded-md flex items-center justify-center">
           <Image
-            className="h-full w-auto"
+            className="h-full w-auto object-contain"
             priority
-            src={FileIcon[file.filetype]}
+            src={FileIcon[file.filetype || "txt"]}
             alt="Icon"
           />
         </div>
         <div className="overflow-hidden">
           <div className="truncate font-semibold">
-            {file.filename} ({inKB(file.filesize)} KB)
-          </div>
-          <div className="truncate text-token-text-tertiary flex items-center gap-2">
-            <span>{file.filetype.toUpperCase()} File</span>
+            {file.filename} {file.filesize ? `(${inKB(file.filesize)} KB)` : ""}
           </div>
+          {file.filetype && (
+            <div className="truncate text-token-text-tertiary flex items-center gap-2">
+              <span>{file.filetype.toUpperCase()} File</span>
+            </div>
+          )}
         </div>
       </div>
       {onRemove && (

From 788fab091f53511b00315ced924793b4720b96b0 Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Wed, 16 Oct 2024 12:37:46 +0700
Subject: [PATCH 31/32] fix: UI overlap, key warning, wrong filename and url in
 markdown

---
 .../streaming/nextjs/app/components/ui/chat/chat-input.tsx | 4 ++--
 .../app/components/ui/chat/chat-message/chat-files.tsx     | 7 +++++--
 .../app/components/ui/chat/chat-message/chat-sources.tsx   | 2 +-
 .../app/components/ui/chat/chat-message/markdown.tsx       | 2 +-
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-input.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-input.tsx
index 417809c13..ce2b02d06 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-input.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-input.tsx
@@ -95,9 +95,9 @@ export default function ChatInput(
       )}
       {files.length > 0 && (
         <div className="flex gap-4 w-full overflow-auto py-2">
-          {files.map((file) => (
+          {files.map((file, index) => (
             <DocumentPreview
-              key={file.metadata?.id}
+              key={file.metadata?.id ?? `${file.filename}-${index}`}
               file={file}
               onRemove={() => removeDoc(file)}
             />
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-files.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-files.tsx
index 42c67598b..085963d2b 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-files.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-files.tsx
@@ -5,8 +5,11 @@ export function ChatFiles({ data }: { data: DocumentFileData }) {
   if (!data.files.length) return null;
   return (
     <div className="flex gap-2 items-center">
-      {data.files.map((file) => (
-        <DocumentPreview key={file.metadata?.id} file={file} />
+      {data.files.map((file, index) => (
+        <DocumentPreview
+          key={file.metadata?.id ?? `${file.filename}-${index}`}
+          file={file}
+        />
       ))}
     </div>
   );
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
index 14fa25727..03a1a6278 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/chat-sources.tsx
@@ -111,7 +111,7 @@ export function DocumentInfo({
   const DocumentDetail = (
     <div className={`relative ${className}`}>
       <PreviewCard className={"cursor-pointer"} file={previewFile} />
-      <div className="absolute bottom-2 left-2 space-x-2 flex">
+      <div className="absolute bottom-2 right-2 space-x-2 flex">
         {sources.map((node: SourceNode, index: number) => (
           <div key={node.id}>
             <SourceInfo node={node} index={index} />
diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
index 7748176f3..8e0845a4e 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
@@ -134,7 +134,7 @@ export default function Markdown({
               return (
                 <DocumentInfo
                   document={{
-                    url: href,
+                    url: new URL(decodeURIComponent(href)).href,
                     sources: [],
                   }}
                   className="mb-2 mt-2"

From 0f56092eb46dfc1dd147539bb4e0b362adb8ba91 Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Wed, 16 Oct 2024 12:56:48 +0700
Subject: [PATCH 32/32] use div as tag wrapper for message

---
 .../nextjs/app/components/ui/chat/chat-message/markdown.tsx     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
index 8e0845a4e..8682a802e 100644
--- a/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
+++ b/templates/types/streaming/nextjs/app/components/ui/chat/chat-message/markdown.tsx
@@ -88,7 +88,7 @@ export default function Markdown({
       rehypePlugins={[rehypeKatex as any]}
       components={{
         p({ children }) {
-          return <p className="mb-2 last:mb-0">{children}</p>;
+          return <div className="mb-2 last:mb-0">{children}</div>;
         },
         code({ node, inline, className, children, ...props }) {
           if (children.length) {