Skip to content

Commit a55c622

Browse files
authored
♻️ harmonize getting page count from a local input source (#212)
1 parent 873dffa commit a55c622

File tree

4 files changed

+66
-29
lines changed

4 files changed

+66
-29
lines changed

lib/mindee/input/sources/local_input_source.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,22 @@ def write_to_file(path)
141141
# Returns the page count for a document.
142142
# Defaults to one for images.
143143
# @return [Integer]
144-
def count_pages
144+
def page_count
145145
return 1 unless pdf?
146146

147147
@io_stream.seek(0)
148148
pdf_processor = Mindee::PDF::PDFProcessor.open_pdf(@io_stream)
149149
pdf_processor.pages.size
150150
end
151151

152+
# Returns the page count for a document.
153+
# Defaults to one for images.
154+
# @return [Integer]
155+
# @deprecated Use {#page_count} instead.
156+
def count_pages
157+
page_count
158+
end
159+
152160
# Compresses the file, according to the provided info.
153161
# @param [Integer] quality Quality of the output file.
154162
# @param [Integer, nil] max_width Maximum width (Ignored for PDFs).

sig/mindee/input/sources/local_input_source.rbs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ module Mindee
1414

1515
def logger: () -> Logger
1616

17-
1817
def rescue_broken_pdf: (untyped) -> void
1918
def pdf?: -> bool
2019
def apply_page_options: (PageOptions) -> StringIO?
2120
def process_pdf: (PageOptions) -> StringIO?
2221
def read_contents: (?close: bool) -> [String?, Hash[:filename, String]]
22+
def page_count: -> Integer
2323
def count_pages: -> Integer
2424
def write_to_file: (String?) -> void
2525
def compress!: (?quality: Integer, ?max_width: Integer?, ?max_height: Integer?, ?force_source_text: bool, ?disable_source_text: bool) -> Integer

spec/extraction/invoice_splitter_extraction_integration.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def prepare_invoice_return(rst_file_path, invoice_prediction)
4545
invoice0.document
4646
)
4747

48-
expect(Mindee::TestUtilities.levenshtein_ratio(invoice0.document.to_s,
49-
test_string_rst_invoice0.chomp)).to be >= 0.97
48+
ratio = Mindee::TestUtilities.levenshtein_ratio(invoice0.document.to_s, test_string_rst_invoice0.chomp)
49+
expect(ratio).to be >= 0.90
5050
end
5151
end

spec/input/sources/sources_spec.rb

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,52 +10,79 @@
1010
describe Mindee::Input::Source do
1111
context 'An image input file' do
1212
it 'should load a JPEG from a path' do
13-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'file_types/receipt.jpg'))
14-
expect(input.file_mimetype).to eq('image/jpeg')
13+
input_source = Mindee::Input::Source::PathInputSource.new(
14+
File.join(DATA_DIR, 'file_types/receipt.jpg')
15+
)
16+
expect(input_source.file_mimetype).to eq('image/jpeg')
17+
expect(input_source.page_count).to eq(1)
18+
expect(input_source.pdf?).to eq(false)
1519

16-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'file_types/receipt.jpga'))
17-
expect(input.file_mimetype).to eq('image/jpeg')
20+
input_source = Mindee::Input::Source::PathInputSource.new(
21+
File.join(DATA_DIR, 'file_types/receipt.jpga')
22+
)
23+
expect(input_source.file_mimetype).to eq('image/jpeg')
24+
expect(input_source.page_count).to eq(1)
25+
expect(input_source.pdf?).to eq(false)
1826
end
1927

2028
it 'should load a TIFF from a path' do
21-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'file_types/receipt.tif'))
22-
expect(input.file_mimetype).to eq('image/tiff')
29+
input_source = Mindee::Input::Source::PathInputSource.new(
30+
File.join(DATA_DIR, 'file_types/receipt.tif')
31+
)
32+
expect(input_source.file_mimetype).to eq('image/tiff')
33+
expect(input_source.page_count).to eq(1)
34+
expect(input_source.pdf?).to eq(false)
2335

24-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'file_types/receipt.tiff'))
25-
expect(input.file_mimetype).to eq('image/tiff')
36+
input_source = Mindee::Input::Source::PathInputSource.new(
37+
File.join(DATA_DIR, 'file_types/receipt.tiff')
38+
)
39+
expect(input_source.file_mimetype).to eq('image/tiff')
40+
expect(input_source.page_count).to eq(1)
41+
expect(input_source.pdf?).to eq(false)
2642
end
2743

2844
it 'should load a HEIC from a path' do
29-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'file_types/receipt.heic'))
30-
expect(input.file_mimetype).to eq('image/heic')
45+
input_source = Mindee::Input::Source::PathInputSource.new(
46+
File.join(DATA_DIR, 'file_types/receipt.heic')
47+
)
48+
expect(input_source.file_mimetype).to eq('image/heic')
49+
expect(input_source.page_count).to eq(1)
50+
expect(input_source.pdf?).to eq(false)
3151
end
3252
end
3353

3454
context 'A PDF input file' do
3555
it 'should load a multi-page PDF from a path' do
36-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'products/invoices/invoice.pdf'))
37-
expect(input.file_mimetype).to eq('application/pdf')
38-
expect(input.pdf?).to eq(true)
56+
input_source = Mindee::Input::Source::PathInputSource.new(
57+
File.join(DATA_DIR, 'products/invoices/invoice.pdf')
58+
)
59+
expect(input_source.file_mimetype).to eq('application/pdf')
60+
expect(input_source.page_count).to eq(2)
61+
expect(input_source.pdf?).to eq(true)
3962

40-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'products/invoices/invoice.pdf'))
41-
expect(input.file_mimetype).to eq('application/pdf')
42-
expect(input.pdf?).to eq(true)
63+
input_source = Mindee::Input::Source::PathInputSource.new(
64+
File.join(DATA_DIR, 'products/invoices/invoice.pdf')
65+
)
66+
expect(input_source.file_mimetype).to eq('application/pdf')
67+
expect(input_source.page_count).to eq(2)
68+
expect(input_source.pdf?).to eq(true)
4369

44-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'products/invoices/invoice_10p.pdf'))
45-
expect(input.file_mimetype).to eq('application/pdf')
46-
expect(input.pdf?).to eq(true)
47-
48-
input = Mindee::Input::Source::PathInputSource.new(File.join(DATA_DIR, 'products/invoices/invoice_10p.pdf'))
49-
expect(input.file_mimetype).to eq('application/pdf')
50-
expect(input.pdf?).to eq(true)
70+
input_source = Mindee::Input::Source::PathInputSource.new(
71+
File.join(DATA_DIR, 'products/invoices/invoice_10p.pdf')
72+
)
73+
expect(input_source.file_mimetype).to eq('application/pdf')
74+
expect(input_source.page_count).to eq(10)
75+
expect(input_source.pdf?).to eq(true)
5176
end
5277
end
5378

5479
context 'A broken fixable PDF' do
5580
mindee_client = Mindee::Client.new(api_key: 'invalid-api-key')
5681
it 'Should not raise a mime error' do
5782
expect do
58-
mindee_client.source_from_path("#{DATA_DIR}/file_types/pdf/broken_fixable.pdf", repair_pdf: true)
83+
mindee_client.source_from_path(
84+
"#{DATA_DIR}/file_types/pdf/broken_fixable.pdf", repair_pdf: true
85+
)
5986
end.not_to raise_error
6087
end
6188
end
@@ -64,7 +91,9 @@
6491
mindee_client = Mindee::Client.new(api_key: 'invalid-api-key')
6592
it 'Should raise an error' do
6693
expect do
67-
mindee_client.source_from_path("#{DATA_DIR}/file_types/pdf/broken_unfixable.pdf", repair_pdf: true)
94+
mindee_client.source_from_path(
95+
"#{DATA_DIR}/file_types/pdf/broken_unfixable.pdf", repair_pdf: true
96+
)
6897
end.to raise_error Mindee::Errors::MindeePDFError
6998
end
7099
end

0 commit comments

Comments
 (0)