2828import zipfile
2929
3030from . import extra
31- 
31+ import importlib.util 
3232
3333# Set up g_out_log and g_out_message from environment variables.
3434#
@@ -333,6 +333,37 @@ def __init__(self):
333333
334334_globals = _Globals()
335335
336+ _get_layout: typing.Optional[typing.Callable] = None
337+ 
338+ # global switch ensuring that the recommendation message is shown at most once
339+ _recommend_layout = True  # must be referred to as "global" everywhere
340+ 
341+ 
342+ def no_recommend_layout():
343+     """For users who never want to see the layout recommendation."""
344+     global _recommend_layout
345+     _recommend_layout = False
346+ 
347+ 
348+ def _warn_layout_once():
349+     """Check if we should recommend installing the layout package."""
350+     msg="""Consider using the pymupdf_layout package for a greatly improved page layout analysis."""
351+ 
352+     global _recommend_layout
353+     if (
354+         1
355+         and _recommend_layout  # still True?
356+         and _get_layout is None  # no layout function stored here
357+ 
358+         # client did not globally disable the recommendation
359+         and os.getenv("PYMUPDF_SUGGEST_LAYOUT_ANALYZER") != "0"
360+ 
361+         # layout is not available in this Python
362+         and not importlib.util.find_spec("pymupdf.layout")
363+     ):
364+         print(msg)
365+         _recommend_layout = False  # never show the message again
366+ 
336367
337368# Optionally use MuPDF via cppyy bindings; experimental and not tested recently
338369# as of 2023-01-20 11:51:40
@@ -9952,7 +9983,7 @@ def _get_resource_properties(self):
99529983        return rc
99539984
99549985    def _get_textpage(self, clip=None, flags=0, matrix=None):
9955-         if g_use_extra:
9986+         if 1 or  g_use_extra:
99569987            ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
99579988            tpage = mupdf.FzStextPage(ll_tpage)
99589989            return tpage
@@ -10781,6 +10812,20 @@ def clip_to_rect(self, rect):
1078110812        pclip = JM_rect_from_py(clip)
1078210813        mupdf.pdf_clip_page(pdfpage, pclip)
1078310814
10815+     def get_layout(self):
10816+         """Try to access layout information."""
10817+ 
10818+         if self.layout_information is not None:
10819+             # layout information already present
10820+             return
10821+ 
10822+         if not _get_layout:
10823+             # no layout information available
10824+             return
10825+ 
10826+         layout_info = _get_layout(self)
10827+         self.layout_information = layout_info
10828+ 
1078410829    @property
1078510830    def artbox(self):
1078610831        """The ArtBox"""
@@ -11432,7 +11477,7 @@ def get_cdrawings(self, extended=None, callback=None, method=None):
1143211477        assert isinstance(page, mupdf.FzPage), f'{self.this=}'
1143311478        clips = True if extended else False
1143411479        prect = mupdf.fz_bound_page(page)
11435-         if g_use_extra:
11480+         if 1 or  g_use_extra:
1143611481            rc = extra.get_cdrawings(page, extended, callback, method)
1143711482        else:
1143811483            rc = list()
@@ -12157,7 +12202,7 @@ def get_texttrace(self):
1215712202            self.set_rotation(0)
1215812203        page = self.this
1215912204        rc = []
12160-         if g_use_extra:
12205+         if 1 or  g_use_extra:
1216112206            dev = extra.JM_new_texttrace_device(rc)
1216212207        else:
1216312208            dev = JM_new_texttrace_device(rc)
@@ -13206,6 +13251,9 @@ def xref(self):
1320613251
1320713252    rect = property(bound, doc="page rectangle")
1320813253
13254+     # any result of layout analysis is stored here
13255+     layout_information = None
13256+ 
1320913257
1321013258class Pixmap:
1321113259
@@ -16391,7 +16439,7 @@ def _textpage_dict(self, raw=False):
1639116439
1639216440    def extractBLOCKS(self):
1639316441        """Return a list with text block information."""
16394-         if g_use_extra:
16442+         if 1 or  g_use_extra:
1639516443            return extra.extractBLOCKS(self.this)
1639616444        block_n = -1
1639716445        this_tpage = self.this
@@ -16587,7 +16635,7 @@ def extractTextbox(self, rect):
1658716635
1658816636    def extractWORDS(self, delimiters=None):
1658916637        """Return a list with text word information."""
16590-         if g_use_extra:
16638+         if 1 or  g_use_extra:
1659116639            return extra.extractWORDS(self.this, delimiters)
1659216640        buflen = 0
1659316641        last_char_rtl = 0
@@ -18969,7 +19017,7 @@ def JM_color_FromSequence(color):
1896919017
1897019018
1897119019def JM_color_count( pm, clip):
18972-     if g_use_extra:
19020+     if 1 or  g_use_extra:
1897319021        return extra.ll_JM_color_count(pm.m_internal, clip)
1897419022
1897519023    rc = dict()
@@ -20469,7 +20517,7 @@ def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
2046920517
2047020518
2047120519def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
20472-     if g_use_extra:
20520+     if 1 or  g_use_extra:
2047320521        return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
2047420522    char_list = None
2047520523    span_list = []
@@ -20682,7 +20730,7 @@ def JM_make_image_block(block, block_dict):
2068220730
2068320731
2068420732def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
20685-     if g_use_extra:
20733+     if 1 or  g_use_extra:
2068620734        return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
2068720735    line_list = []
2068820736    block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
@@ -20705,7 +20753,7 @@ def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
2070520753
2070620754
2070720755def JM_make_textpage_dict(tp, page_dict, raw):
20708-     if g_use_extra:
20756+     if 1 or  g_use_extra:
2070920757        return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
2071020758    text_buffer = mupdf.fz_new_buffer(128)
2071120759    block_list = []
@@ -21356,7 +21404,7 @@ def JM_rotate_page_matrix(page):
2135621404
2135721405
2135821406def JM_search_stext_page(page, needle):
21359-     if g_use_extra:
21407+     if 1 or  g_use_extra:
2136021408        return extra.JM_search_stext_page(page.m_internal, needle)
2136121409
2136221410    rect = mupdf.FzRect(page.m_internal.mediabox)
0 commit comments