1111
1212@pytest .mark .filename ("arxiv_test.pdf" )
1313@pytest .mark .output_format ("markdown" )
14+ @pytest .mark .config ({"disable_ocr" : True })
1415def test_pdf_links (pdf_document : Document , config , renderer , model_dict , temp_doc ):
1516 first_page = pdf_document .pages [1 ]
1617
@@ -19,26 +20,35 @@ def test_pdf_links(pdf_document: Document, config, renderer, model_dict, temp_do
1920 artifact_dict = model_dict ,
2021 processor_list = processors ,
2122 renderer = classes_to_strings ([renderer ])[0 ],
22- config = config
23+ config = config ,
2324 )
2425
25- for section_header_span in first_page .contained_blocks (pdf_document , (BlockTypes .Span ,)):
26+ for section_header_span in first_page .contained_blocks (
27+ pdf_document , (BlockTypes .Span ,)
28+ ):
2629 if "II." in section_header_span .text :
2730 assert section_header_span .url == "#page-1-0"
2831 break
2932 else :
3033 raise ValueError ("Could not find II. in the first page" )
3134
32- section_header_block = first_page .contained_blocks (pdf_document , (BlockTypes .SectionHeader ,))[0 ]
33- assert section_header_block .raw_text (pdf_document ) == 'II. THEORETICAL FRAMEWORK\n '
35+ section_header_block = first_page .contained_blocks (
36+ pdf_document , (BlockTypes .SectionHeader ,)
37+ )[0 ]
38+ assert section_header_block .raw_text (pdf_document ) == "II. THEORETICAL FRAMEWORK\n "
3439
3540 assert first_page .refs [0 ].ref == "page-1-0"
3641
3742 markdown_output : MarkdownOutput = pdf_converter (temp_doc .name )
3843 markdown = markdown_output .markdown
3944
40- assert ' [II.](#page-1-0)' in markdown
45+ assert " [II.](#page-1-0)" in markdown
4146 assert '<span id="page-1-0"></span>II. THEORETICAL FRAMEWORK' in markdown
4247
43- for ref in set ([f'<span id="page-{ m [0 ]} -{ m [1 ]} ">' for m in re .findall (r'\]\(#page-(\d+)-(\d+)\)' , markdown )]):
48+ for ref in set (
49+ [
50+ f'<span id="page-{ m [0 ]} -{ m [1 ]} ">'
51+ for m in re .findall (r"\]\(#page-(\d+)-(\d+)\)" , markdown )
52+ ]
53+ ):
4454 assert ref in markdown , f"Reference { ref } not found in markdown"
0 commit comments