Skip to content

Commit 805e066

Browse files
committed
Update table.py
documentary changes
1 parent 8f34325 commit 805e066

File tree

6 files changed

+681
-262
lines changed

6 files changed

+681
-262
lines changed

src/__init__.py

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import zipfile
2929

3030
from . import extra
31-
31+
import importlib.util
3232

3333
# Set up g_out_log and g_out_message from environment variables.
3434
#
@@ -333,6 +333,37 @@ def __init__(self):
333333

334334
_globals = _Globals()
335335

336+
_get_layout: typing.Optional[typing.Callable] = None
337+
338+
# global switch ensuring that the recommendation message is shown at most once
339+
_recommend_layout = True # must be referred to as "global" everywhere
340+
341+
342+
def no_recommend_layout():
343+
"""For users who never want to see the layout recommendation."""
344+
global _recommend_layout
345+
_recommend_layout = False
346+
347+
348+
def _warn_layout_once():
349+
"""Check if we should recommend installing the layout package."""
350+
msg="""Consider using the pymupdf_layout package for a greatly improved page layout analysis."""
351+
352+
global _recommend_layout
353+
if (
354+
1
355+
and _recommend_layout # still True?
356+
and _get_layout is None # no layout function stored here
357+
358+
# client did not globally disable the recommendation
359+
and os.getenv("PYMUPDF_SUGGEST_LAYOUT_ANALYZER") != "0"
360+
361+
# layout is not available in this Python
362+
and not importlib.util.find_spec("pymupdf.layout")
363+
):
364+
print(msg)
365+
_recommend_layout = False # never show the message again
366+
336367

337368
# Optionally use MuPDF via cppyy bindings; experimental and not tested recently
338369
# as of 2023-01-20 11:51:40
@@ -9952,7 +9983,7 @@ def _get_resource_properties(self):
99529983
return rc
99539984

99549985
def _get_textpage(self, clip=None, flags=0, matrix=None):
9955-
if g_use_extra:
9986+
if 1 or g_use_extra:
99569987
ll_tpage = extra.page_get_textpage(self.this, clip, flags, matrix)
99579988
tpage = mupdf.FzStextPage(ll_tpage)
99589989
return tpage
@@ -10781,6 +10812,20 @@ def clip_to_rect(self, rect):
1078110812
pclip = JM_rect_from_py(clip)
1078210813
mupdf.pdf_clip_page(pdfpage, pclip)
1078310814

10815+
def get_layout(self):
10816+
"""Try to access layout information."""
10817+
10818+
if self.layout_information is not None:
10819+
# layout information already present
10820+
return
10821+
10822+
if not _get_layout:
10823+
# no layout information available
10824+
return
10825+
10826+
layout_info = _get_layout(self)
10827+
self.layout_information = layout_info
10828+
1078410829
@property
1078510830
def artbox(self):
1078610831
"""The ArtBox"""
@@ -11432,7 +11477,7 @@ def get_cdrawings(self, extended=None, callback=None, method=None):
1143211477
assert isinstance(page, mupdf.FzPage), f'{self.this=}'
1143311478
clips = True if extended else False
1143411479
prect = mupdf.fz_bound_page(page)
11435-
if g_use_extra:
11480+
if 1 or g_use_extra:
1143611481
rc = extra.get_cdrawings(page, extended, callback, method)
1143711482
else:
1143811483
rc = list()
@@ -12157,7 +12202,7 @@ def get_texttrace(self):
1215712202
self.set_rotation(0)
1215812203
page = self.this
1215912204
rc = []
12160-
if g_use_extra:
12205+
if 1 or g_use_extra:
1216112206
dev = extra.JM_new_texttrace_device(rc)
1216212207
else:
1216312208
dev = JM_new_texttrace_device(rc)
@@ -13206,6 +13251,9 @@ def xref(self):
1320613251

1320713252
rect = property(bound, doc="page rectangle")
1320813253

13254+
# any result of layout analysis is stored here
13255+
layout_information = None
13256+
1320913257

1321013258
class Pixmap:
1321113259

@@ -16391,7 +16439,7 @@ def _textpage_dict(self, raw=False):
1639116439

1639216440
def extractBLOCKS(self):
1639316441
"""Return a list with text block information."""
16394-
if g_use_extra:
16442+
if 1 or g_use_extra:
1639516443
return extra.extractBLOCKS(self.this)
1639616444
block_n = -1
1639716445
this_tpage = self.this
@@ -16587,7 +16635,7 @@ def extractTextbox(self, rect):
1658716635

1658816636
def extractWORDS(self, delimiters=None):
1658916637
"""Return a list with text word information."""
16590-
if g_use_extra:
16638+
if 1 or g_use_extra:
1659116639
return extra.extractWORDS(self.this, delimiters)
1659216640
buflen = 0
1659316641
last_char_rtl = 0
@@ -18969,7 +19017,7 @@ def JM_color_FromSequence(color):
1896919017

1897019018

1897119019
def JM_color_count( pm, clip):
18972-
if g_use_extra:
19020+
if 1 or g_use_extra:
1897319021
return extra.ll_JM_color_count(pm.m_internal, clip)
1897419022

1897519023
rc = dict()
@@ -20469,7 +20517,7 @@ def JM_make_annot_DA(annot, ncol, col, fontname, fontsize):
2046920517

2047020518

2047120519
def JM_make_spanlist(line_dict, line, raw, buff, tp_rect):
20472-
if g_use_extra:
20520+
if 1 or g_use_extra:
2047320521
return extra.JM_make_spanlist(line_dict, line, raw, buff, tp_rect)
2047420522
char_list = None
2047520523
span_list = []
@@ -20682,7 +20730,7 @@ def JM_make_image_block(block, block_dict):
2068220730

2068320731

2068420732
def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
20685-
if g_use_extra:
20733+
if 1 or g_use_extra:
2068620734
return extra.JM_make_text_block(block.m_internal, block_dict, raw, buff.m_internal, tp_rect.m_internal)
2068720735
line_list = []
2068820736
block_rect = mupdf.FzRect(mupdf.FzRect.Fixed_EMPTY)
@@ -20705,7 +20753,7 @@ def JM_make_text_block(block, block_dict, raw, buff, tp_rect):
2070520753

2070620754

2070720755
def JM_make_textpage_dict(tp, page_dict, raw):
20708-
if g_use_extra:
20756+
if 1 or g_use_extra:
2070920757
return extra.JM_make_textpage_dict(tp.m_internal, page_dict, raw)
2071020758
text_buffer = mupdf.fz_new_buffer(128)
2071120759
block_list = []
@@ -21356,7 +21404,7 @@ def JM_rotate_page_matrix(page):
2135621404

2135721405

2135821406
def JM_search_stext_page(page, needle):
21359-
if g_use_extra:
21407+
if 1 or g_use_extra:
2136021408
return extra.JM_search_stext_page(page.m_internal, needle)
2136121409

2136221410
rect = mupdf.FzRect(page.m_internal.mediabox)

0 commit comments

Comments
 (0)