From 054f982d33bf31a8d63dfd6d73bc4534455453ae Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Wed, 30 Apr 2025 16:39:58 -0700 Subject: [PATCH 1/6] Move all file processing from UI to DocSum backend service Signed-off-by: Melanie Buehler --- DocSum/docker_compose/amd/gpu/rocm/README.md | 22 +- .../docker_compose/intel/cpu/xeon/README.md | 24 +- .../docker_compose/intel/hpu/gaudi/README.md | 22 +- DocSum/docsum.py | 44 ++-- DocSum/ui/gradio/docsum_ui_gradio.py | 214 +++++++----------- 5 files changed, 177 insertions(+), 149 deletions(-) diff --git a/DocSum/docker_compose/amd/gpu/rocm/README.md b/DocSum/docker_compose/amd/gpu/rocm/README.md index fe37f39d57..3b027bb92c 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/README.md +++ b/DocSum/docker_compose/amd/gpu/rocm/README.md @@ -241,11 +241,10 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ ### Query with audio and video -> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI. - Audio: ```bash +# Send base64 string curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' @@ -257,11 +256,21 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -F "max_tokens=32" \ -F "language=en" \ -F "stream=True" + +# Upload file +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ + -H "Content-Type: multipart/form-data" \ + -F "type=audio" \ + -F "messages=" \ + -F "files=@/path to your file (.mp3, .wav)" \ + -F "max_tokens=32" \ + -F "language=en" ``` Video: ```bash +# Send base64 string curl -X POST http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "video", "messages": "convert your video to base64 data type"}' @@ -273,6 +282,15 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -F "max_tokens=32" \ -F "language=en" \ -F "stream=True" + +# Upload file +curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ + -H "Content-Type: multipart/form-data" \ + -F "type=video" \ + -F "messages=" \ + -F "files=@/path to your file (.mp4)" \ + -F "max_tokens=32" \ + -F "language=en" ``` ### Query with long context diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md index 0930ab227e..b47bb292b5 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/README.md +++ b/DocSum/docker_compose/intel/cpu/xeon/README.md @@ -156,16 +156,15 @@ curl http://${host_ip}:8888/v1/docsum \ -F "messages=" \ -F "files=@/path to your file (.txt, .docx, .pdf)" \ -F "max_tokens=32" \ - -F "language=en" \ + -F "language=en" ``` ### Query with audio and video -> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI. - Audio: ```bash +# Send base64 string curl -X POST http://${host_ip}:8888/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' @@ -177,11 +176,21 @@ curl http://${host_ip}:8888/v1/docsum \ -F "max_tokens=32" \ -F "language=en" \ -F "stream=True" + +# Upload file +curl http://${host_ip}:8888/v1/docsum \ + -H "Content-Type: multipart/form-data" \ + -F "type=audio" \ + -F "messages=" \ + -F "files=@/path to your file (.mp3, .wav)" \ + -F "max_tokens=32" \ + -F "language=en" ``` Video: ```bash +# Send base64 string curl -X POST http://${host_ip}:8888/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "video", "messages": "convert your video to base64 data type"}' @@ -193,6 +202,15 @@ curl http://${host_ip}:8888/v1/docsum \ -F "max_tokens=32" \ -F "language=en" \ -F "stream=True" + +# Upload file +curl http://${host_ip}:8888/v1/docsum \ + -H "Content-Type: multipart/form-data" \ + -F "type=video" \ + -F "messages=" \ + -F "files=@/path to your file (.mp4)" \ + -F "max_tokens=32" \ + -F "language=en" ``` ### Query with long context diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md index 7b552fd5b8..256bea262d 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/README.md +++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md @@ -163,11 +163,10 @@ curl http://${host_ip}:8888/v1/docsum \ ### Query with audio and video -> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI. - Audio: ```bash +# Send base64 string curl -X POST http://${host_ip}:8888/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' @@ -179,11 +178,21 @@ curl http://${host_ip}:8888/v1/docsum \ -F "max_tokens=32" \ -F "language=en" \ -F "stream=True" + +# Upload file +curl http://${host_ip}:8888/v1/docsum \ + -H "Content-Type: multipart/form-data" \ + -F "type=audio" \ + -F "messages=" \ + -F "files=@/path to your file (.mp3, .wav)" \ + -F "max_tokens=32" \ + -F "language=en" ``` Video: ```bash +# Send base64 string curl -X POST http://${host_ip}:8888/v1/docsum \ -H "Content-Type: application/json" \ -d '{"type": "video", "messages": "convert your video to base64 data type"}' @@ -195,6 +204,15 @@ curl http://${host_ip}:8888/v1/docsum \ -F "max_tokens=32" \ -F "language=en" \ -F "stream=True" + +# Upload file +curl http://${host_ip}:8888/v1/docsum \ + -H "Content-Type: multipart/form-data" \ + -F "type=video" \ + -F "messages=" \ + -F "files=@/path to your file (.mp4)" \ + -F "max_tokens=32" \ + -F "language=en" ``` ### Query with long context diff --git a/DocSum/docsum.py b/DocSum/docsum.py index 34e58c1df0..1be5e322a7 100644 --- a/DocSum/docsum.py +++ b/DocSum/docsum.py @@ -63,6 +63,20 @@ def read_pdf(file): return docs +def encode_file_to_base64(file_path): + """Encode the content of a file to a base64 string. + + Args: + file_path (str): The path to the file to be encoded. + + Returns: + str: The base64 encoded string of the file content. + """ + with open(file_path, "rb") as f: + base64_str = base64.b64encode(f.read()).decode("utf-8") + return base64_str + + def video2audio( video_base64: str, ) -> str: @@ -163,7 +177,6 @@ def add_remote_service(self): async def handle_request(self, request: Request, files: List[UploadFile] = File(default=None)): """Accept pure text, or files .txt/.pdf.docx, audio/video base64 string.""" - if "application/json" in request.headers.get("content-type"): data = await request.json() stream_opt = data.get("stream", True) @@ -193,25 +206,24 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File( uid = str(uuid.uuid4()) file_path = f"/tmp/{uid}" - if data_type is not None and data_type in ["audio", "video"]: - raise ValueError( - "Audio and Video file uploads are not supported in docsum with curl request, \ - please use the UI or pass base64 string of the content directly." - ) + import aiofiles - else: - import aiofiles - - async with aiofiles.open(file_path, "wb") as f: - await f.write(await file.read()) + async with aiofiles.open(file_path, "wb") as f: + await f.write(await file.read()) + if data_type == "text": docs = read_text_from_file(file, file_path) - os.remove(file_path) + elif data_type in ["audio", "video"]: + docs = encode_file_to_base64(file_path) + else: + raise ValueError(f"Data type not recognized: {data_type}") + + os.remove(file_path) - if isinstance(docs, list): - file_summaries.extend(docs) - else: - file_summaries.append(docs) + if isinstance(docs, list): + file_summaries.extend(docs) + else: + file_summaries.append(docs) if file_summaries: prompt = handle_message(chat_request.messages) + "\n".join(file_summaries) diff --git a/DocSum/ui/gradio/docsum_ui_gradio.py b/DocSum/ui/gradio/docsum_ui_gradio.py index 5bb9a7091c..a477b31ff9 100644 --- a/DocSum/ui/gradio/docsum_ui_gradio.py +++ b/DocSum/ui/gradio/docsum_ui_gradio.py @@ -22,76 +22,12 @@ class DocSumUI: def __init__(self): """Initialize the DocSumUI class with accepted file types, headers, and backend service endpoint.""" - self.ACCEPTED_FILE_TYPES = ["pdf", "doc", "docx"] + self.ACCEPTED_TEXT_FILE_TYPES = [".pdf", ".doc", ".docx"] + self.ACCEPTED_AUDIO_FILE_TYPES = [".mp3", ".wav"] + self.ACCEPTED_VIDEO_FILE_TYPES = [".mp4"] self.HEADERS = {"Content-Type": "application/json"} self.BACKEND_SERVICE_ENDPOINT = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/docsum") - def encode_file_to_base64(self, file_path): - """Encode the content of a file to a base64 string. - - Args: - file_path (str): The path to the file to be encoded. - - Returns: - str: The base64 encoded string of the file content. - """ - logger.info(">>> Encoding file to base64: %s", file_path) - with open(file_path, "rb") as f: - base64_str = base64.b64encode(f.read()).decode("utf-8") - return base64_str - - def read_file(self, file): - """Read and process the content of a file. - - Args: - file (file-like object): The file to be read. - - Returns: - str: The content of the file or an error message if the file type is unsupported. - """ - self.page_content = "" - self.pages = [] - - if file.name.endswith(".pdf"): - loader = PyPDFLoader(file) - elif file.name.endswith((".doc", ".docx")): - loader = Docx2txtLoader(file) - else: - msg = f"Unsupported file type '{file.name}'. Choose from {self.ACCEPTED_FILE_TYPES}" - logger.error(msg) - return msg - - for page in loader.lazy_load(): - self.page_content += page.page_content - - return self.page_content - - def read_audio_file(self, file): - """Read and process the content of an audio file. - - Args: - file (file-like object): The audio file to be read. - - Returns: - str: The base64 encoded content of the audio file. - """ - logger.info(">>> Reading audio file: %s", file.name) - base64_str = self.encode_file_to_base64(file) - return base64_str - - def read_video_file(self, file): - """Read and process the content of a video file. - - Args: - file (file-like object): The video file to be read. - - Returns: - str: The base64 encoded content of the video file. - """ - logger.info(">>> Reading video file: %s", file.name) - base64_str = self.encode_file_to_base64(file) - return base64_str - def is_valid_url(self, url): try: result = urlparse(url) @@ -128,78 +64,107 @@ def read_url(self, url): return self.page_content - def generate_summary(self, doc_content, document_type="text"): + def process_response(self, response): + if response.status_code == 200: + try: + # Check if the specific log path is in the response text + if "/logs/LLMChain/final_output" in response.text: + # Extract the relevant part of the response + temp = ast.literal_eval( + [ + i.split("data: ")[1] + for i in response.text.split("\n\n") + if "/logs/LLMChain/final_output" in i + ][0] + )["ops"] + + # Find the final output value + final_output = [i["value"] for i in temp if i["path"] == "/logs/LLMChain/final_output"][0] + return final_output["text"] + else: + # Perform string replacements to clean the response text + cleaned_text = response.text + replacements = [ + ("'\n\ndata: b'", ""), + ("data: b' ", ""), + ("'\n\ndata: [DONE]\n\n", ""), + ("\n\ndata: b", ""), + ("'\n\n", ""), + ("'\n", ""), + ('''\'"''', ""), + ] + for old, new in replacements: + cleaned_text = cleaned_text.replace(old, new) + return cleaned_text + except (IndexError, KeyError, ValueError) as e: + # Handle potential errors during parsing + logger.error("Error parsing response: %s", e) + return response.text + + def generate_summary(self, document, document_type="text"): """Generate a summary for the given document content. Args: - doc_content (str): The content of the document. + document (str): The content or path of the document. document_type (str): The type of the document (default is "text"). Returns: str: The generated summary or an error message. """ - logger.info(">>> BACKEND_SERVICE_ENDPOINT - %s", self.BACKEND_SERVICE_ENDPOINT) - data = {"max_tokens": 256, "type": document_type, "messages": doc_content} + data = {"max_tokens": 256, "type": document_type, "messages": ""} + + if os.path.exists(document): + file_header = "text/plain" + file_ext = os.path.splitext(document)[-1] + if file_ext == ".pdf": + file_header = "application/pdf" + elif file_ext in [".doc", ".docx"]: + file_header = "application/octet-stream" + elif file_ext in self.ACCEPTED_AUDIO_FILE_TYPES + self.ACCEPTED_VIDEO_FILE_TYPES: + file_header = f"{document_type}/{file_ext[-3:]}" + files = {"files": (os.path.basename(document), open(document, "rb"), file_header)} + try: + response = requests.post( + url=self.BACKEND_SERVICE_ENDPOINT, + headers={}, + files=files, + data=data, + proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]}, + ) - try: - response = requests.post( - url=self.BACKEND_SERVICE_ENDPOINT, - headers=self.HEADERS, - data=json.dumps(data), - proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]}, - ) + return self.process_response(response) + + except requests.exceptions.RequestException as e: + logger.error("Request exception: %s", e) + return str(e) + + else: + data["messages"] = document + try: + response = requests.post( + url=self.BACKEND_SERVICE_ENDPOINT, + headers=self.HEADERS, + data=json.dumps(data), + proxies={"http_proxy": os.environ["http_proxy"], "https_proxy": os.environ["https_proxy"]}, + ) + + return self.process_response(response) - if response.status_code == 200: - try: - # Check if the specific log path is in the response text - if "/logs/LLMChain/final_output" in response.text: - # Extract the relevant part of the response - temp = ast.literal_eval( - [ - i.split("data: ")[1] - for i in response.text.split("\n\n") - if "/logs/LLMChain/final_output" in i - ][0] - )["ops"] - - # Find the final output value - final_output = [i["value"] for i in temp if i["path"] == "/logs/LLMChain/final_output"][0] - return final_output["text"] - else: - # Perform string replacements to clean the response text - cleaned_text = response.text - replacements = [ - ("'\n\ndata: b'", ""), - ("data: b' ", ""), - ("'\n\ndata: [DONE]\n\n", ""), - ("\n\ndata: b", ""), - ("'\n\n", ""), - ("'\n", ""), - ('''\'"''', ""), - ] - for old, new in replacements: - cleaned_text = cleaned_text.replace(old, new) - return cleaned_text - except (IndexError, KeyError, ValueError) as e: - # Handle potential errors during parsing - logger.error("Error parsing response: %s", e) - return response.text - - except requests.exceptions.RequestException as e: - logger.error("Request exception: %s", e) - return str(e) + except requests.exceptions.RequestException as e: + logger.error("Request exception: %s", e) + return str(e) return str(response.status_code) - def create_upload_ui(self, label, file_types, process_function, document_type="text"): + def create_upload_ui(self, label, file_types, document_type="text"): """Create a Gradio UI for file uploads. Args: label (str): The label for the upload button. file_types (list): The list of accepted file types. - process_function (function): The function to process the uploaded file. + document_type (str): The document type (text, audio, or video). Default is text. Returns: gr.Blocks: The Gradio Blocks object representing the upload UI. @@ -214,7 +179,7 @@ def create_upload_ui(self, label, file_types, process_function, document_type="t label="Text Summary", placeholder="Summarized text will be displayed here" ) upload_btn.upload( - lambda file: self.generate_summary(process_function(file), document_type=document_type), + lambda file: self.generate_summary(file, document_type=document_type), upload_btn, generated_text, ) @@ -264,23 +229,20 @@ def render(self): # File Upload UI file_ui = self.create_upload_ui( label="Please upload a document (.pdf, .doc, .docx)", - file_types=[".pdf", ".doc", ".docx"], - process_function=self.read_file, + file_types=self.ACCEPTED_TEXT_FILE_TYPES ) # Audio Upload UI audio_ui = self.create_upload_ui( label="Please upload audio file (.wav, .mp3)", - file_types=[".wav", ".mp3"], - process_function=self.read_audio_file, + file_types=self.ACCEPTED_AUDIO_FILE_TYPES, document_type="audio", ) # Video Upload UI video_ui = self.create_upload_ui( label="Please upload Video file (.mp4)", - file_types=[".mp4"], - process_function=self.read_video_file, + file_types=self.ACCEPTED_VIDEO_FILE_TYPES, document_type="video", ) From bc09a633d7ee539c4f1a9a50a589b5b90cb93d3f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 May 2025 22:54:04 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- DocSum/docsum.py | 20 ++++++++++---------- DocSum/ui/gradio/docsum_ui_gradio.py | 3 +-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/DocSum/docsum.py b/DocSum/docsum.py index 1be5e322a7..786e48a264 100644 --- a/DocSum/docsum.py +++ b/DocSum/docsum.py @@ -64,17 +64,17 @@ def read_pdf(file): def encode_file_to_base64(file_path): - """Encode the content of a file to a base64 string. + """Encode the content of a file to a base64 string. - Args: - file_path (str): The path to the file to be encoded. + Args: + file_path (str): The path to the file to be encoded. - Returns: - str: The base64 encoded string of the file content. - """ - with open(file_path, "rb") as f: - base64_str = base64.b64encode(f.read()).decode("utf-8") - return base64_str + Returns: + str: The base64 encoded string of the file content. + """ + with open(file_path, "rb") as f: + base64_str = base64.b64encode(f.read()).decode("utf-8") + return base64_str def video2audio( @@ -217,7 +217,7 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File( docs = encode_file_to_base64(file_path) else: raise ValueError(f"Data type not recognized: {data_type}") - + os.remove(file_path) if isinstance(docs, list): diff --git a/DocSum/ui/gradio/docsum_ui_gradio.py b/DocSum/ui/gradio/docsum_ui_gradio.py index a477b31ff9..f354e269d4 100644 --- a/DocSum/ui/gradio/docsum_ui_gradio.py +++ b/DocSum/ui/gradio/docsum_ui_gradio.py @@ -228,8 +228,7 @@ def render(self): # File Upload UI file_ui = self.create_upload_ui( - label="Please upload a document (.pdf, .doc, .docx)", - file_types=self.ACCEPTED_TEXT_FILE_TYPES + label="Please upload a document (.pdf, .doc, .docx)", file_types=self.ACCEPTED_TEXT_FILE_TYPES ) # Audio Upload UI From ca57350c622437aee27b85a1136168077f3bbc31 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Thu, 1 May 2025 16:10:25 -0700 Subject: [PATCH 3/6] Improve display text of document types Signed-off-by: Melanie Buehler --- DocSum/ui/gradio/docsum_ui_gradio.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/DocSum/ui/gradio/docsum_ui_gradio.py b/DocSum/ui/gradio/docsum_ui_gradio.py index f354e269d4..5844a432b9 100644 --- a/DocSum/ui/gradio/docsum_ui_gradio.py +++ b/DocSum/ui/gradio/docsum_ui_gradio.py @@ -228,19 +228,20 @@ def render(self): # File Upload UI file_ui = self.create_upload_ui( - label="Please upload a document (.pdf, .doc, .docx)", file_types=self.ACCEPTED_TEXT_FILE_TYPES + label=f"Please upload a document ({', '.join(self.ACCEPTED_TEXT_FILE_TYPES)})", + file_types=self.ACCEPTED_TEXT_FILE_TYPES ) # Audio Upload UI audio_ui = self.create_upload_ui( - label="Please upload audio file (.wav, .mp3)", + label=f"Please upload audio file ({', '.join(self.ACCEPTED_AUDIO_FILE_TYPES)})", file_types=self.ACCEPTED_AUDIO_FILE_TYPES, document_type="audio", ) # Video Upload UI video_ui = self.create_upload_ui( - label="Please upload Video file (.mp4)", + label=f"Please upload video file ({', '.join(self.ACCEPTED_VIDEO_FILE_TYPES)})", file_types=self.ACCEPTED_VIDEO_FILE_TYPES, document_type="video", ) From caba592e93e06579ac620f1b0433236503054fce Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 1 May 2025 23:15:26 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- DocSum/ui/gradio/docsum_ui_gradio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DocSum/ui/gradio/docsum_ui_gradio.py b/DocSum/ui/gradio/docsum_ui_gradio.py index 5844a432b9..8d8a440ce3 100644 --- a/DocSum/ui/gradio/docsum_ui_gradio.py +++ b/DocSum/ui/gradio/docsum_ui_gradio.py @@ -229,7 +229,7 @@ def render(self): # File Upload UI file_ui = self.create_upload_ui( label=f"Please upload a document ({', '.join(self.ACCEPTED_TEXT_FILE_TYPES)})", - file_types=self.ACCEPTED_TEXT_FILE_TYPES + file_types=self.ACCEPTED_TEXT_FILE_TYPES, ) # Audio Upload UI From 5a5384d91ee049ec4dc0af5fa70a17cb2a3eb616 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Mon, 5 May 2025 14:52:40 -0700 Subject: [PATCH 5/6] Updated docs and tests per feedback Signed-off-by: Melanie Buehler --- DocSum/docker_compose/amd/gpu/rocm/README.md | 4 +++ .../docker_compose/intel/cpu/xeon/README.md | 4 +++ .../docker_compose/intel/hpu/gaudi/README.md | 4 +++ DocSum/tests/test_compose_on_gaudi.sh | 28 +++++++++++++++++++ DocSum/tests/test_compose_on_xeon.sh | 28 +++++++++++++++++++ DocSum/tests/test_compose_tgi_on_gaudi.sh | 28 +++++++++++++++++++ DocSum/tests/test_compose_tgi_on_xeon.sh | 28 +++++++++++++++++++ 7 files changed, 124 insertions(+) diff --git a/DocSum/docker_compose/amd/gpu/rocm/README.md b/DocSum/docker_compose/amd/gpu/rocm/README.md index 3b027bb92c..da9d7d749f 100644 --- a/DocSum/docker_compose/amd/gpu/rocm/README.md +++ b/DocSum/docker_compose/amd/gpu/rocm/README.md @@ -239,8 +239,12 @@ curl http://${HOST_IP}:${DOCSUM_BACKEND_SERVER_PORT}/v1/docsum \ -F "language=en" \ ``` +Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs. + ### Query with audio and video +> Audio and video can be passed as base64 strings or uploaded by providing a local file path. + Audio: ```bash diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md index b47bb292b5..06d3e4378d 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/README.md +++ b/DocSum/docker_compose/intel/cpu/xeon/README.md @@ -159,8 +159,12 @@ curl http://${host_ip}:8888/v1/docsum \ -F "language=en" ``` +Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs. + ### Query with audio and video +> Audio and video can be passed as base64 strings or uploaded by providing a local file path. + Audio: ```bash diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md index 256bea262d..5cf9e77477 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/README.md +++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md @@ -161,8 +161,12 @@ curl http://${host_ip}:8888/v1/docsum \ -F "language=en" \ ``` +Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs. + ### Query with audio and video +> Audio and video can be passed as base64 strings or uploaded by providing a local file path. + Audio: ```bash diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh index aecdc006c7..fe3a3f7325 100644 --- a/DocSum/tests/test_compose_on_gaudi.sh +++ b/DocSum/tests/test_compose_on_gaudi.sh @@ -237,6 +237,20 @@ function validate_megaservice_multimedia() { "language=en" \ "stream=False" + echo ">>> Checking audio data in form format, upload file" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ + "well" \ + "docsum-xeon-backend-server" \ + "docsum-xeon-backend-server" \ + "media" "" \ + "type=audio" \ + "messages=" \ + "files=@$ROOT_FOLDER/data/test.wav" \ + "max_tokens=32" \ + "language=en" \ + "stream=False" + echo ">>> Checking video data in json format" validate_service \ "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ @@ -258,6 +272,20 @@ function validate_megaservice_multimedia() { "max_tokens=32" \ "language=en" \ "stream=False" + + echo ">>> Checking video data in form format, upload file" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ + "bye" \ + "docsum-xeon-backend-server" \ + "docsum-xeon-backend-server" \ + "media" "" \ + "type=video" \ + "messages=" \ + "files=@$ROOT_FOLDER/data/test.mp4" \ + "max_tokens=32" \ + "language=en" \ + "stream=False" } function validate_megaservice_long_text() { diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh index 5ff7add6be..c231e7264e 100644 --- a/DocSum/tests/test_compose_on_xeon.sh +++ b/DocSum/tests/test_compose_on_xeon.sh @@ -237,6 +237,20 @@ function validate_megaservice_multimedia() { "language=en" \ "stream=False" + echo ">>> Checking audio data in form format, upload file" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ + "well" \ + "docsum-xeon-backend-server" \ + "docsum-xeon-backend-server" \ + "media" "" \ + "type=audio" \ + "messages=" \ + "files=@$ROOT_FOLDER/data/test.wav" \ + "max_tokens=32" \ + "language=en" \ + "stream=False" + echo ">>> Checking video data in json format" validate_service \ "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ @@ -258,6 +272,20 @@ function validate_megaservice_multimedia() { "max_tokens=32" \ "language=en" \ "stream=False" + + echo ">>> Checking video data in form format, upload file" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ + "bye" \ + "docsum-xeon-backend-server" \ + "docsum-xeon-backend-server" \ + "media" "" \ + "type=video" \ + "messages=" \ + "files=@$ROOT_FOLDER/data/test.mp4" \ + "max_tokens=32" \ + "language=en" \ + "stream=False" } function validate_megaservice_long_text() { diff --git a/DocSum/tests/test_compose_tgi_on_gaudi.sh b/DocSum/tests/test_compose_tgi_on_gaudi.sh index 6859e5354a..06dd9b7292 100644 --- a/DocSum/tests/test_compose_tgi_on_gaudi.sh +++ b/DocSum/tests/test_compose_tgi_on_gaudi.sh @@ -229,6 +229,20 @@ function validate_megaservice_multimedia() { "language=en" \ "stream=False" + echo ">>> Checking audio data in form format, upload file" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ + "well" \ + "docsum-gaudi-backend-server" \ + "docsum-gaudi-backend-server" \ + "media" "" \ + "type=audio" \ + "messages=" \ + "files=@$ROOT_FOLDER/data/test.wav" \ + "max_tokens=32" \ + "language=en" \ + "stream=False" + echo ">>> Checking video data in json format" validate_service \ "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ @@ -250,6 +264,20 @@ function validate_megaservice_multimedia() { "max_tokens=32" \ "language=en" \ "stream=False" + + echo ">>> Checking video data in form format, upload file" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ + "bye" \ + "docsum-gaudi-backend-server" \ + "docsum-gaudi-backend-server" \ + "media" "" \ + "type=video" \ + "messages=" \ + "files=@$ROOT_FOLDER/data/test.mp4" \ + "max_tokens=32" \ + "language=en" \ + "stream=False" } function validate_megaservice_long_text() { diff --git a/DocSum/tests/test_compose_tgi_on_xeon.sh b/DocSum/tests/test_compose_tgi_on_xeon.sh index f94eabf0c8..52edea31f8 100644 --- a/DocSum/tests/test_compose_tgi_on_xeon.sh +++ b/DocSum/tests/test_compose_tgi_on_xeon.sh @@ -229,6 +229,20 @@ function validate_megaservice_multimedia() { "language=en" \ "stream=False" + echo ">>> Checking audio data in form format, upload file" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ + "well" \ + "docsum-xeon-backend-server" \ + "docsum-xeon-backend-server" \ + "media" "" \ + "type=audio" \ + "messages=" \ + "files=@$ROOT_FOLDER/data/test.wav" \ + "max_tokens=32" \ + "language=en" \ + "stream=False" + echo ">>> Checking video data in json format" validate_service \ "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ @@ -250,6 +264,20 @@ function validate_megaservice_multimedia() { "max_tokens=32" \ "language=en" \ "stream=False" + + echo ">>> Checking video data in form format, upload file" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ + "bye" \ + "docsum-xeon-backend-server" \ + "docsum-xeon-backend-server" \ + "media" "" \ + "type=video" \ + "messages=" \ + "files=@$ROOT_FOLDER/data/test.mp4" \ + "max_tokens=32" \ + "language=en" \ + "stream=False" } function validate_megaservice_long_text() { From 79c2c0ed2bf0e894cc2c70ee904df57346091031 Mon Sep 17 00:00:00 2001 From: Melanie Buehler Date: Mon, 5 May 2025 15:11:39 -0700 Subject: [PATCH 6/6] Fix service name Signed-off-by: Melanie Buehler --- DocSum/tests/test_compose_on_gaudi.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh index fe3a3f7325..3c0f3d695b 100644 --- a/DocSum/tests/test_compose_on_gaudi.sh +++ b/DocSum/tests/test_compose_on_gaudi.sh @@ -241,8 +241,8 @@ function validate_megaservice_multimedia() { validate_service \ "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ "well" \ - "docsum-xeon-backend-server" \ - "docsum-xeon-backend-server" \ + "docsum-gaudi-backend-server" \ + "docsum-gaudi-backend-server" \ "media" "" \ "type=audio" \ "messages=" \ @@ -277,8 +277,8 @@ function validate_megaservice_multimedia() { validate_service \ "${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" \ "bye" \ - "docsum-xeon-backend-server" \ - "docsum-xeon-backend-server" \ + "docsum-gaudi-backend-server" \ + "docsum-gaudi-backend-server" \ "media" "" \ "type=video" \ "messages=" \