Skip to content

Rework / remove method reload_page #4602

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 3 additions & 50 deletions src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5232,35 +5232,8 @@ def prev_location(self, page_id):

def reload_page(self, page: Page) -> Page:
"""Make a fresh copy of a page."""
old_annots = {} # copy annot references to here
pno = page.number # save the page number
for k, v in page._annot_refs.items(): # save the annot dictionary
old_annots[k] = v

# When we call `self.load_page()` below, it will end up in
# fz_load_chapter_page(), which will return any matching page in the
# document's list of non-ref-counted loaded pages, instead of actually
# reloading the page.
#
# We want to assert that we have actually reloaded the fz_page, and not
# simply returned the same `fz_page*` pointer from the document's list
# of non-ref-counted loaded pages.
#
# So we first remove our reference to the `fz_page*`. This will
# decrement .refs, and if .refs was 1, this is guaranteed to free the
# `fz_page*` and remove it from the document's list if it was there. So
# we are guaranteed that our returned `fz_page*` is from a genuine
# reload, even if it happens to reuse the original block of memory.
#
# However if the original .refs is greater than one, there must be
# other references to the `fz_page` somewhere, and we require that
# these other references are not keeping the page in the document's
# list. We check that we are returning a newly loaded page by
# asserting that our returned `fz_page*` is different from the original
# `fz_page*` - the original was not freed, so a new `fz_page` cannot
# reuse the same block of memory.
#

page.refresh() # synchronize links & annotations
refs_old = page.this.m_internal.refs
m_internal_old = page.this.m_internal_value()

Expand All @@ -5269,24 +5242,6 @@ def reload_page(self, page: Page) -> Page:
page = None
TOOLS.store_shrink(100)
page = self.load_page(pno) # reload the page

# copy annot refs over to the new dictionary
#page_proxy = weakref.proxy(page)
for k, v in old_annots.items():
annot = old_annots[k]
#annot.parent = page_proxy # refresh parent to new page
page._annot_refs[k] = annot
if refs_old == 1:
# We know that `page.this = None` will have decremented the ref
# count to zero so we are guaranteed that the new `fz_page` is a
# new page even if it happens to have reused the same block of
# memory.
pass
else:
# Check that the new `fz_page*` is different from the original.
m_internal_new = page.this.m_internal_value()
assert m_internal_new != m_internal_old, \
f'{refs_old=} {m_internal_old=:#x} {m_internal_new=:#x}'
return page

def resolve_link(self, uri=None, chapters=0):
Expand Down Expand Up @@ -9742,10 +9697,8 @@ def read_contents(self):
def refresh(self):
"""Refresh page after link/annot/widget updates."""
CheckParent(self)
doc = self.parent
page = doc.reload_page(self)
# fixme this looks wrong.
self.this = page
pdf_page = _as_pdf_page(self)
mupdf.pdf_sync_page(pdf_page)

@property
def rotation(self):
Expand Down
4 changes: 4 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2244,6 +2244,7 @@ def insert_link(page: pymupdf.Page, lnk: dict, mark: bool = True) -> None:
if annot == "":
raise ValueError("link kind not supported")
page._addAnnot_FromString((annot,))
page.refresh()


def insert_textbox(
Expand Down Expand Up @@ -2494,6 +2495,8 @@ def rect_function(*args):
link["from"] *= mat
page.insert_link(link)

# ensure any new links are available
page.refresh()
return spare_height, scale


Expand Down Expand Up @@ -4271,6 +4274,7 @@ def center_rect(annot_rect, new_text, font, fsize):
)
fsize -= 0.5 # reduce font if unsuccessful
shape.commit() # append new contents object
page.refresh() # synchronize links and annotations
return True


Expand Down
1 change: 0 additions & 1 deletion tests/test_annots.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ def test_stamp():
annot_xref = annot.xref
page.load_annot(annot_id)
page.load_annot(annot_xref)
page = doc.reload_page(page)


def test_image_stamp():
Expand Down
2 changes: 1 addition & 1 deletion tests/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@ def test_2957_1():
page.apply_redactions()

# reload page to finalize updates
page = doc.reload_page(page)
#page = doc.reload_page(page)

# the two string must retain their positions (except rounding errors)
rects1 = page.search_for("6e9f73dfb4384a2b8af6ebba")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_mupdf_regressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_707727():
page = doc[0]
pix0 = page.get_pixmap()
page.clean_contents(sanitize=True)
page = doc.reload_page(page) # required to prevent re-use
# page = doc.reload_page(page) # required to prevent re-use
pix1 = page.get_pixmap()
rms = gentle_compare.pixmaps_rms(pix0, pix1)
print(f'{rms=}', flush=1)
Expand Down
2 changes: 0 additions & 2 deletions tests/test_named_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ def test_2886():
# insert this link in an arbitrary page & rect
page = doc[-1]
page.insert_link(link)
# need this to update the internal MuPDF annotations array
page = doc.reload_page(page)

# our new link must be the last in the following list
links = page.get_links()
Expand Down
1 change: 0 additions & 1 deletion tests/test_textbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ def test_htmlbox1():
spare_height, scale = page.insert_htmlbox(rect, text, rotate=rot, scale_low=0)
assert spare_height == 0
assert 0 < scale < 1
page = doc.reload_page(page)
link = page.get_links()[0] # extracts the links on the page

assert link["uri"] == "https://www.artifex.com"
Expand Down
8 changes: 0 additions & 8 deletions tests/test_widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,25 +398,19 @@ def test_4055():
w.field_value = w.on_state()
w.update()

page = doc.reload_page(page) # reload page to make sure we start fresh

# Round 2: confirm that fields contain the PDF's own on values
for w in page.widgets(types=[2]):
# confirm each value coincides with the "Yes" value
assert w.field_value == w.on_state()
w.field_value = False # switch to "Off" using False
w.update()

page = doc.reload_page(page)

# Round 3: confirm that 'False' achieved "Off" values
for w in page.widgets(types=[2]):
assert w.field_value == "Off"
w.field_value = True # use True for the next round
w.update()

page = doc.reload_page(page)

# Round 4: confirm that setting to True also worked
for w in page.widgets(types=[2]):
assert w.field_value == w.on_state()
Expand All @@ -425,8 +419,6 @@ def test_4055():
w.field_value = "Yes"
w.update()

page = doc.reload_page(page)

# Round 5: final check: setting to "Yes" also does work
for w in page.widgets(types=[2]):
assert w.field_value == w.on_state()
Loading