Skip to content

Commit

Permalink
Merge pull request #39 from Filimoa/fitz-memory-leak
Browse files Browse the repository at this point in the history
[Memory Leak Fix] Create Fitz Pdf From Bytestream
  • Loading branch information
Filimoa authored Apr 28, 2024
2 parents 56b5a88 + 4aac742 commit 7e3fdd8
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "openparse"
description = "Streamlines the process of preparing documents for LLM's."
readme = "README.md"
requires-python = ">=3.8"
version = "0.5.4"
version = "0.5.5"
authors = [{name = "Sergey Filimonov", email = "[email protected]"}]
dependencies = [
"PyMuPDF >= 1.23.2",
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions src/openparse/doc_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from openparse import tables, text, consts
from openparse.pdf import Pdf
from openparse.types import NOT_GIVEN, NotGiven
from openparse._types import NOT_GIVEN, NotGiven
from openparse.processing import (
IngestionPipeline,
BasicIngestionPipeline,
Expand Down Expand Up @@ -34,7 +34,7 @@ class PyMuPDFArgsDict(TypedDict, total=False):


def _table_args_dict_to_model(
args_dict: Union[TableTransformersArgsDict, PyMuPDFArgsDict]
args_dict: Union[TableTransformersArgsDict, PyMuPDFArgsDict],
) -> Union[tables.TableTransformersArgs, tables.PyMuPDFArgs]:
if args_dict["parsing_algorithm"] == "table-transformers":
return tables.TableTransformersArgs(**args_dict)
Expand Down
10 changes: 5 additions & 5 deletions src/openparse/pdf.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import random
import tempfile
import io
from pathlib import Path
from typing import Iterator, List, Literal, Optional, Union, Tuple, Any
from pydantic import BaseModel

from pydantic import BaseModel
from pdfminer.high_level import extract_pages
from pdfminer.layout import LTPage
from pypdf import PdfReader, PdfWriter
Expand Down Expand Up @@ -115,9 +115,9 @@ def to_pymupdf_doc(self):
if not self.writer.pages:
return fitz.open(self.file_path)

with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
self.writer.write(tmpfile.name)
return fitz.open(tmpfile.name)
byte_stream = io.BytesIO()
self.writer.write(byte_stream)
return fitz.open(None, byte_stream)

def _draw_bboxes(
self,
Expand Down
2 changes: 1 addition & 1 deletion src/openparse/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
OPEN_PARSE_VERSION = "0.5.4"
OPEN_PARSE_VERSION = "0.5.5"


def version_info() -> str:
Expand Down

0 comments on commit 7e3fdd8

Please sign in to comment.