33import pytest
44
55from mindee .inputs import Base64Document , BytesDocument , FileDocument , PathDocument
6+ from tests import INVOICE_DATA_DIR , RECEIPT_DATA_DIR
7+
8+ #
9+ # PDF
10+ #
611
712
813def test_pdf_reconstruct_fail ():
914 with pytest .raises (AssertionError ):
1015 PathDocument (
11- "./tests/data/invoice /invoice_10p.pdf" ,
16+ f" { INVOICE_DATA_DIR } /invoice_10p.pdf" ,
1217 cut_pdf = True ,
1318 n_pdf_pages = 4 ,
1419 )
1520
1621
1722def test_pdf_reconstruct_ok ():
18- input_file = PathDocument ("./tests/data/invoice /invoice_10p.pdf" )
23+ input_file = PathDocument (f" { INVOICE_DATA_DIR } /invoice_10p.pdf" )
1924 assert isinstance (input_file .file_object , io .BytesIO )
2025
2126
22- def test_read_contents ():
23- input_doc = PathDocument ("./tests/data/invoice /invoice.pdf" )
27+ def test_pdf_read_contents ():
28+ input_doc = PathDocument (f" { INVOICE_DATA_DIR } /invoice.pdf" )
2429 contents = input_doc .read_contents (close_file = False )
2530 assert contents [0 ] == "invoice.pdf"
2631 assert isinstance (contents [1 ], bytes )
@@ -31,27 +36,28 @@ def test_read_contents():
3136
3237
3338def test_pdf_reconstruct_no_cut ():
34- input_file = PathDocument ("./tests/data/invoice /invoice_10p.pdf" , cut_pdf = False )
39+ input_file = PathDocument (f" { INVOICE_DATA_DIR } /invoice_10p.pdf" , cut_pdf = False )
3540 assert input_file .count_pdf_pages () == 10
3641 assert isinstance (input_file .file_object , io .BufferedReader )
3742
3843
3944def test_pdf_reconstruct_check_n_pages ():
4045 input_obj_3 = PathDocument (
41- "./tests/data/invoice /invoice_10p.pdf" ,
46+ f" { INVOICE_DATA_DIR } /invoice_10p.pdf" ,
4247 cut_pdf = True ,
4348 n_pdf_pages = 3 ,
4449 )
4550 input_obj_2 = PathDocument (
46- "./tests/data/invoice /invoice_10p.pdf" ,
51+ f" { INVOICE_DATA_DIR } /invoice_10p.pdf" ,
4752 cut_pdf = True ,
4853 n_pdf_pages = 2 ,
4954 )
5055 input_obj_1 = PathDocument (
51- "./tests/data/invoice /invoice_10p.pdf" ,
56+ f" { INVOICE_DATA_DIR } /invoice_10p.pdf" ,
5257 cut_pdf = True ,
5358 n_pdf_pages = 1 ,
5459 )
60+ assert input_obj_1 .file_mimetype == "application/pdf"
5561
5662 # re-initialize file pointer
5763 input_obj_3 .file_object .seek (0 )
@@ -63,40 +69,44 @@ def test_pdf_reconstruct_check_n_pages():
6369 assert input_obj_1 .count_pdf_pages () == 1
6470
6571
66- def test_input_from_path ():
72+ def test_pdf_input_from_path ():
6773 input_obj_1 = PathDocument (
68- "./tests/data/invoice /invoice_10p.pdf" ,
74+ f" { INVOICE_DATA_DIR } /invoice_10p.pdf" ,
6975 cut_pdf = True ,
7076 n_pdf_pages = 1 ,
7177 )
78+ assert input_obj_1 .file_mimetype == "application/pdf"
7279 assert input_obj_1 .count_pdf_pages () == 1
7380
7481
75- def test_input_from_file ():
76- with open ("./tests/data/invoice /invoice_10p.pdf" , "rb" ) as fp :
82+ def test_pdf_input_from_file ():
83+ with open (f" { INVOICE_DATA_DIR } /invoice_10p.pdf" , "rb" ) as fp :
7784 input_obj_1 = FileDocument (fp , cut_pdf = True , n_pdf_pages = 1 )
85+ assert input_obj_1 .file_mimetype == "application/pdf"
7886 assert input_obj_1 .count_pdf_pages () == 1
7987
8088
81- def test_input_from_base64 ():
82- with open ("./tests/data/invoice /invoice_10p.txt" , "rt" ) as fp :
89+ def test_pdf_input_from_base64 ():
90+ with open (f" { INVOICE_DATA_DIR } /invoice_10p.txt" , "rt" ) as fp :
8391 input_obj_1 = Base64Document (
8492 fp .read (),
8593 filename = "invoice_10p.pdf" ,
8694 cut_pdf = True ,
8795 n_pdf_pages = 1 ,
8896 )
97+ assert input_obj_1 .file_mimetype == "application/pdf"
8998 assert input_obj_1 .count_pdf_pages () == 1
9099
91100
92- def test_input_from_bytes ():
93- with open ("./tests/data/invoice /invoice_10p.pdf" , "rb" ) as fp :
101+ def test_pdf_input_from_bytes ():
102+ with open (f" { INVOICE_DATA_DIR } /invoice_10p.pdf" , "rb" ) as fp :
94103 input_obj_1 = BytesDocument (
95104 fp .read (),
96105 filename = "invoice_10p.pdf" ,
97106 cut_pdf = True ,
98107 n_pdf_pages = 1 ,
99108 )
109+ assert input_obj_1 .file_mimetype == "application/pdf"
100110 assert input_obj_1 .count_pdf_pages () == 1
101111
102112
@@ -109,3 +119,33 @@ def test_pdf_blank_check():
109119
110120 input_not_blank = PathDocument ("./tests/data/pdfs/not_blank_image_only.pdf" )
111121 assert input_not_blank .count_pdf_pages () == 1
122+
123+
124+ #
125+ # Images
126+ #
127+
128+
129+ def test_tif_input_from_path ():
130+ input_obj_1 = PathDocument (
131+ f"{ RECEIPT_DATA_DIR } /receipt.tif" ,
132+ cut_pdf = True ,
133+ n_pdf_pages = 1 ,
134+ )
135+ assert input_obj_1 .file_mimetype == "image/tiff"
136+
137+ input_obj_2 = PathDocument (
138+ f"{ RECEIPT_DATA_DIR } /receipt.tiff" ,
139+ cut_pdf = True ,
140+ n_pdf_pages = 1 ,
141+ )
142+ assert input_obj_2 .file_mimetype == "image/tiff"
143+
144+
145+ def test_heic_input_from_path ():
146+ input_obj_1 = PathDocument (
147+ f"{ RECEIPT_DATA_DIR } /receipt.heic" ,
148+ cut_pdf = True ,
149+ n_pdf_pages = 1 ,
150+ )
151+ assert input_obj_1 .file_mimetype == "image/heic"
0 commit comments