Skip to content

Commit fab0c9f

Browse files
authoredDec 18, 2024
Merge pull request #118 from alexander-densley/main
add density and height to python as well
2 parents 581756b + a58b7f7 commit fab0c9f

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed
 

‎py_zerox/pyzerox/core/zerox.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import aiofiles
88
import aiofiles.os as async_os
99
import asyncio
10+
from ..constants import PDFConversionDefaultOptions
1011

1112
# Package Imports
1213
from ..processor import (
@@ -26,6 +27,8 @@ async def zerox(
2627
cleanup: bool = True,
2728
concurrency: int = 10,
2829
file_path: Optional[str] = "",
30+
image_density: int = PDFConversionDefaultOptions.DPI,
31+
image_height: tuple[Optional[int], int] = PDFConversionDefaultOptions.SIZE,
2932
maintain_format: bool = False,
3033
model: str = "gpt-4o-mini",
3134
output_dir: Optional[str] = None,
@@ -130,7 +133,7 @@ async def zerox(
130133
**subset_pdf_create_kwargs)
131134

132135
# Convert the file to a series of images, below function returns a list of image paths in page order
133-
images = await convert_pdf_to_images(local_path=local_path, temp_dir=temp_directory)
136+
images = await convert_pdf_to_images(image_density=image_density, image_height=image_height, local_path=local_path, temp_dir=temp_directory)
134137

135138
if maintain_format:
136139
for image in images:

‎py_zerox/pyzerox/processor/pdf.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@
1111
from ..models import litellmmodel
1212

1313

14-
async def convert_pdf_to_images(local_path: str, temp_dir: str) -> List[str]:
14+
async def convert_pdf_to_images(image_density: int, image_height: tuple[Optional[int], int], local_path: str, temp_dir: str) -> List[str]:
1515
"""Converts a PDF file to a series of images in the temp_dir. Returns a list of image paths in page order."""
1616
options = {
1717
"pdf_path": local_path,
1818
"output_folder": temp_dir,
19-
"dpi": PDFConversionDefaultOptions.DPI,
19+
"dpi": image_density,
2020
"fmt": PDFConversionDefaultOptions.FORMAT,
21-
"size": PDFConversionDefaultOptions.SIZE,
21+
"size": image_height,
2222
"thread_count": PDFConversionDefaultOptions.THREAD_COUNT,
2323
"use_pdftocairo": PDFConversionDefaultOptions.USE_PDFTOCAIRO,
2424
"paths_only": True,

0 commit comments

Comments
 (0)
Please sign in to comment.