|
15 | 15 | from unstructured_client import UnstructuredClient
|
16 | 16 | from unstructured_client.models import shared, operations
|
17 | 17 | from unstructured_client.models.errors import HTTPValidationError
|
| 18 | +from unstructured_client.models.shared.partition_parameters import OutputFormat |
18 | 19 | from unstructured_client.utils.retries import BackoffStrategy, RetryConfig
|
19 | 20 | from unstructured_client._hooks.custom import form_utils
|
20 | 21 | from unstructured_client._hooks.custom import split_pdf_hook
|
@@ -458,3 +459,37 @@ async def mock_send(_, request: httpx.Request, **kwargs):
|
458 | 459 | assert mock_endpoint_called
|
459 | 460 |
|
460 | 461 | assert res.status_code == 200
|
| 462 | + |
| 463 | + |
| 464 | +@pytest.mark.parametrize("split_pdf_page", [True, False]) |
| 465 | +def test_integration_split_csv_response(split_pdf_page, doc_path): |
| 466 | + try: |
| 467 | + response = requests.get("http://127.0.0.1:8000/general/docs") |
| 468 | + assert response.status_code == 200 |
| 469 | + except requests.exceptions.ConnectionError: |
| 470 | + assert False, "The unstructured-api is not running on 127.0.0.1:8000" |
| 471 | + |
| 472 | + client = UnstructuredClient(api_key_auth="", server_url="127.0.0.1:8000") |
| 473 | + filename = "layout-parser-paper.pdf" |
| 474 | + with open(doc_path / filename, "rb") as f: |
| 475 | + files = shared.Files( |
| 476 | + content=f.read(), |
| 477 | + file_name=filename, |
| 478 | + ) |
| 479 | + req = operations.PartitionRequest( |
| 480 | + partition_parameters=shared.PartitionParameters( |
| 481 | + files=files, |
| 482 | + output_format=OutputFormat.TEXT_CSV, |
| 483 | + split_pdf_page=split_pdf_page, |
| 484 | + ) |
| 485 | + ) |
| 486 | + |
| 487 | + resp = client.general.partition(request=req) |
| 488 | + |
| 489 | + assert resp.status_code == 200 |
| 490 | + assert resp.content_type == "text/csv; charset=utf-8" |
| 491 | + assert resp.elements is None |
| 492 | + assert resp.csv_elements is not None |
| 493 | + assert resp.csv_elements.startswith( |
| 494 | + "type,element_id,text,filetype,languages,page_number,filename,parent_id" |
| 495 | + ) |
0 commit comments