Skip to content

Commit 413e470

Browse files
committed
✨ add receipt v4
1 parent 41f808b commit 413e470

31 files changed

+459
-159
lines changed

CHANGELOG.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
11
# Mindee Python API Library Changelog
22

3+
## v3.0.0 (2022-10-31)
4+
### ¡Breaking Changes!
5+
* :sparkles: New PDF cut/merge system, allowing specifying exactly which pages to use.
6+
* :recycle: PDF documents are no longer cut by default, use the `page_options` parameter in the `parse` method.
7+
* :sparkles: Document (endpoints) are now versioned, providing better backward-compatible support.
8+
* :sparkles: Pass the document class instead of a string to specify how to `parse` input sources.
9+
* :recycle: Some methods and parameters renamed for better clarity.
10+
* :sparkles: Results from Custom documents are now deserialized into objects, rather than `dict`.
11+
12+
### Changes
13+
* :sparkles: Add support for expense receipts V4.
14+
* :recycle: minor improvements to geometry functions.
15+
16+
### Fixes
17+
* :bug: Make sure the user is specified when calling custom docs on CLI
18+
* :bug: Add default timeout of 120 seconds for endpoints.
19+
320
## v2.6.0 (2022-10-10)
421
### Fixes
522
* :bug: don't print "None" when filename is empty

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ mindee_client = Client(api_key="my-api-key")
3939

4040
# Load a file from disk
4141
input_doc = mindee_client.doc_from_path("/path/to/the/check.jpg")
42-
# Parse the document as an invoice by passing the documents.us.TypeBankCheckV1 type
42+
# Parse the document as a USA bank check by passing the documents.us.TypeBankCheckV1 type
4343
api_response = input_doc.parse(documents.us.TypeBankCheckV1)
4444

4545
# Print a brief summary of the parsed data
@@ -57,10 +57,11 @@ mindee_client = Client(api_key="my-api-key").add_endpoint(
5757
endpoint_name="wnine",
5858
)
5959

60-
# Load a file from disk and parse it
60+
# Load a file from disk and parse it.
61+
# The endpoint name must be specified since it can't be determined from the class.
6162
api_response = mindee_client.doc_from_path(
6263
"/path/to/the/w9.jpg"
63-
).parse(documents.TypeCustomV1, "wnine")
64+
).parse(documents.TypeCustomV1, endpoint_name="wnine")
6465

6566
# Print a brief summary of the parsed data
6667
print(api_response.document)

mindee/__main__.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,29 @@ class CommandConfig(Generic[TypeDoc]):
1818

1919

2020
DOCUMENTS: Dict[str, CommandConfig] = {
21+
"custom": CommandConfig(
22+
help="Custom document type from API builder",
23+
doc_class=documents.TypeCustomV1,
24+
),
2125
"invoice": CommandConfig(
2226
help="Invoice",
2327
doc_class=documents.TypeInvoiceV3,
2428
),
2529
"receipt": CommandConfig(
2630
help="Expense Receipt",
27-
doc_class=documents.TypeReceiptV3,
31+
doc_class=documents.TypeReceiptV4,
2832
),
2933
"passport": CommandConfig(
3034
help="Passport",
3135
doc_class=documents.TypePassportV1,
3236
),
3337
"financial": CommandConfig(
3438
help="Financial Document (receipt or invoice)",
35-
doc_class=documents.TypeFinancialDocument,
39+
doc_class=documents.TypeFinancialV1,
3640
),
37-
"custom": CommandConfig(
38-
help="Custom document type from API builder",
39-
doc_class=documents.TypeCustomV1,
41+
"us-check": CommandConfig(
42+
help="US Bank Check",
43+
doc_class=documents.us.TypeBankCheckV1,
4044
),
4145
}
4246

mindee/client.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
from mindee.documents.base import Document, TypeDocument
55
from mindee.documents.config import DocumentConfig, DocumentConfigDict
66
from mindee.documents.custom.custom_v1 import CustomV1
7-
from mindee.documents.financial_document import FinancialDocument
7+
from mindee.documents.financial.financial_v1 import FinancialV1
88
from mindee.documents.invoice.invoice_v3 import InvoiceV3
99
from mindee.documents.passport.passport_v1 import PassportV1
1010
from mindee.documents.receipt.receipt_v3 import ReceiptV3
11+
from mindee.documents.receipt.receipt_v4 import ReceiptV4
1112
from mindee.documents.us.bank_check.bank_check_v1 import BankCheckV1
1213
from mindee.endpoints import OTS_OWNER, CustomEndpoint, HTTPException, StandardEndpoint
1314
from mindee.input.page_options import PageOptions
@@ -22,7 +23,7 @@
2223
from mindee.response import PredictResponse
2324

2425

25-
def get_type_var_name(type_var) -> str:
26+
def get_bound_classname(type_var) -> str:
2627
"""Get the name of the bound class."""
2728
return type_var.__bound__.__name__
2829

@@ -62,8 +63,8 @@ def parse(
6263
Set to `False` if you need to access the file after this operation.
6364
:param page_options: PageOptions object for cutting multipage documents.
6465
"""
65-
if get_type_var_name(document_class) != CustomV1.__name__:
66-
endpoint_name = get_type_var_name(document_class)
66+
if get_bound_classname(document_class) != CustomV1.__name__:
67+
endpoint_name = get_bound_classname(document_class)
6768
elif endpoint_name is None:
6869
raise RuntimeError("document_type is required for CustomDocument")
6970

@@ -109,7 +110,7 @@ def _make_request(
109110
include_words: bool,
110111
close_file: bool,
111112
) -> PredictResponse[TypeDocument]:
112-
if get_type_var_name(document_class) != doc_config.document_class.__name__:
113+
if get_bound_classname(document_class) != doc_config.document_class.__name__:
113114
raise RuntimeError("Document class mismatch!")
114115

115116
response = doc_config.document_class.request(
@@ -181,9 +182,18 @@ def _init_default_endpoints(self) -> None:
181182
)
182183
],
183184
),
184-
(OTS_OWNER, FinancialDocument.__name__): DocumentConfig(
185+
(OTS_OWNER, ReceiptV4.__name__): DocumentConfig(
186+
document_type="receipt_v3",
187+
document_class=ReceiptV4,
188+
endpoints=[
189+
StandardEndpoint(
190+
url_name="expense_receipts", version="4", api_key=self.api_key
191+
)
192+
],
193+
),
194+
(OTS_OWNER, FinancialV1.__name__): DocumentConfig(
185195
document_type="financial_doc",
186-
document_class=FinancialDocument,
196+
document_class=FinancialV1,
187197
endpoints=[
188198
StandardEndpoint(
189199
url_name="invoices", version="3", api_key=self.api_key

mindee/documents/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from mindee.documents import us
22
from mindee.documents.custom.custom_v1 import TypeCustomV1
3-
from mindee.documents.financial_document import TypeFinancialDocument
3+
from mindee.documents.financial.financial_v1 import TypeFinancialV1
44
from mindee.documents.invoice.invoice_v3 import TypeInvoiceV3
55
from mindee.documents.passport.passport_v1 import TypePassportV1
66
from mindee.documents.receipt.receipt_v3 import TypeReceiptV3
7+
from mindee.documents.receipt.receipt_v4 import TypeReceiptV4

mindee/documents/base.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import re
23
from typing import Any, Dict, List, Optional, TypeVar
34

45
from mindee.endpoints import Endpoint
@@ -21,7 +22,7 @@ def serialize_for_json(obj: Any) -> Any:
2122
class Document:
2223
type: str
2324
"""Document type"""
24-
checklist: dict = {}
25+
checklist: dict
2526
"""Validation checks for the document"""
2627
filepath: Optional[str] = None
2728
"""Path of the input document"""
@@ -45,6 +46,7 @@ def __init__(
4546
self.type = document_type
4647

4748
self._build_from_api_prediction(api_prediction, page_n=page_n)
49+
self.checklist = {}
4850
self._checklist()
4951
self._reconstruct()
5052

@@ -81,5 +83,11 @@ def all_checks(self) -> bool:
8183
"""Return status of all checks."""
8284
return all(self.checklist)
8385

86+
@staticmethod
87+
def clean_out_string(out_string: str) -> str:
88+
"""Clean up the string representation."""
89+
regexp = re.compile(r" \n")
90+
return regexp.sub("\n", out_string)
91+
8492

8593
TypeDocument = TypeVar("TypeDocument", bound=Document)

mindee/documents/custom/custom_v1.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,13 @@ def _build_from_api_prediction(
5454
self.fields[field_name] = ListField(prediction=field, page_n=page_n)
5555

5656
def __str__(self) -> str:
57-
custom_doc_str = (
58-
f"----- {self.type} -----\nFilename: {self.filename or ''}".rstrip() + "\n"
59-
)
57+
custom_doc_str = f"----- {self.type} -----\nFilename: {self.filename or ''}\n"
6058
for class_name, class_info in self.classifications.items():
6159
custom_doc_str += f"{class_name}: {class_info}\n"
6260
for field_name, field_info in self.fields.items():
6361
custom_doc_str += f"{field_name}: {field_info}\n"
6462
custom_doc_str += "----------------------"
65-
return custom_doc_str
63+
return self.clean_out_string(custom_doc_str)
6664

6765
def _checklist(self) -> None:
6866
pass

mindee/documents/financial/__init__.py

Whitespace-only changes.

mindee/documents/financial_document.py renamed to mindee/documents/financial/financial_v1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from mindee.fields.typed import TypedField
1515

1616

17-
class FinancialDocument(Document):
17+
class FinancialV1(Document):
1818
locale: LocaleField
1919
"""locale information"""
2020
total_incl: AmountField
@@ -125,9 +125,9 @@ def _build_from_api_prediction(
125125
self.customer_address = TextField({"value": None, "confidence": 0.0})
126126

127127
def __str__(self) -> str:
128-
return (
128+
return self.clean_out_string(
129129
"-----Financial Document data-----\n"
130-
f"Filename: {self.filename or ''}".rstrip() + "\n"
130+
f"Filename: {self.filename or ''}\n"
131131
f"Invoice number: {self.invoice_number.value}\n"
132132
f"Total amount including taxes: {self.total_incl.value}\n"
133133
f"Total amount excluding taxes: {self.total_excl.value}\n"
@@ -216,4 +216,4 @@ def __taxes_match_total_incl(self) -> bool:
216216
return False
217217

218218

219-
TypeFinancialDocument = TypeVar("TypeFinancialDocument", bound=FinancialDocument)
219+
TypeFinancialV1 = TypeVar("TypeFinancialV1", bound=FinancialV1)

mindee/documents/invoice/invoice_v3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,9 @@ def __str__(self) -> str:
134134
[str(p) for p in self.payment_details]
135135
)
136136
taxes = "\n ".join(f"{t}" for t in self.taxes)
137-
return (
137+
return self.clean_out_string(
138138
"-----Invoice data-----\n"
139-
f"Filename: {self.filename or ''}".rstrip() + "\n"
139+
f"Filename: {self.filename or ''}\n"
140140
f"Invoice number: {self.invoice_number}\n"
141141
f"Total amount including taxes: {self.total_incl}\n"
142142
f"Total amount excluding taxes: {self.total_excl}\n"

0 commit comments

Comments
 (0)