Skip to content

Commit 45ebd61

Browse files
Invoice2.1 (#8)
* chg 🔖 prepare V1.2.0 * chg: ✨ updated SDK to new Mindee API * chg: ✅ Updated tests * chg: ➖ Deleted Numpy dependency * chg: 🙈 added DS_Store to gitignore * chg: 🙈 deleted DS_Store
1 parent 110f397 commit 45ebd61

30 files changed

+1865
-999
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,6 @@ dmypy.json
128128
# Pyre type checker
129129
.pyre/
130130
/data/
131+
132+
# Mac OS
133+
.DS_Store

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Mindee python SDK
22

3+
## v1.2.0 (2020-08-25)
4+
5+
### Chg
6+
7+
* :sparkles: Adapted SDK to the new Mindee API endpoint
8+
* :zap: Single page object reconstruction is now server-side
9+
* :heavy_minus_sign: Removed Numpy dependency
10+
* :white_check_mark: Updated tests with new data
11+
312
## v1.1.3 (2020-02-21)
413

514
### Fix

mindee/__init__.py

Lines changed: 31 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@
1010
from mindee.documents.passport import Passport
1111
from mindee.benchmark import Benchmark
1212

13+
DOCUMENT_CLASSES = {
14+
"receipt": Receipt,
15+
"invoice": Invoice,
16+
"financial_document": FinancialDocument,
17+
"passport": Passport,
18+
"license_plate": CarPlate
19+
}
20+
1321

1422
class Client(object):
1523
def __init__(
@@ -29,7 +37,7 @@ def __init__(
2937
"""
3038
assert type(raise_on_error) == bool
3139
self.raise_on_error = raise_on_error
32-
self.base_url = "https://api.mindee.net/products/"
40+
self.base_url = "https://api.mindee.net/v1/products/mindee/"
3341
self.expense_receipt_token = expense_receipt_token
3442
self.invoice_token = invoice_token
3543
self.passport_token = passport_token
@@ -79,10 +87,11 @@ def _wrap_response(
7987
:return: Full response object
8088
"""
8189
dict_response = response.json()
82-
if response.status_code != 200 and self.raise_on_error:
90+
91+
if response.status_code > 201 and self.raise_on_error:
8392
raise HTTPException(
8493
"Receipt API %s HTTP error: %s" % (response.status_code, json.dumps(dict_response)))
85-
elif response.status_code != 200:
94+
elif response.status_code > 201:
8695
return Response(
8796
http_response=dict_response,
8897
pages=[],
@@ -288,55 +297,30 @@ def format_response(json_response, document_type, input_file):
288297
json_response["filepath"] = input_file.filepath
289298
json_response["file_extension"] = input_file.file_extension
290299
pages = []
291-
for page_n, page_prediction in enumerate(json_response["predictions"]):
292-
if document_type == "receipt":
293-
pages.append(
294-
Receipt(
295-
api_prediction=page_prediction,
296-
input_file=input_file,
297-
page_n=page_n
298-
)
299-
)
300-
elif document_type == "invoice":
301-
pages.append(
302-
Invoice(
303-
api_prediction=page_prediction,
304-
input_file=input_file,
305-
page_n=page_n
306-
)
307-
)
308-
elif document_type == "financial_document":
309-
pages.append(
310-
FinancialDocument(
311-
api_prediction=page_prediction,
312-
input_file=input_file,
313-
page_n=page_n
314-
)
315-
)
316-
elif document_type == "passport":
317-
pages.append(
318-
Passport(
319-
api_prediction=page_prediction,
320-
input_file=input_file,
321-
page_n=page_n
322-
)
323-
)
324-
elif document_type == "license_plate":
325-
pages.append(
326-
CarPlate(
327-
api_prediction=page_prediction,
328-
input_file=input_file,
329-
page_n=page_n
330-
)
300+
301+
if document_type not in DOCUMENT_CLASSES.keys():
302+
raise Exception("Document type not supported.")
303+
304+
# Create page level objects
305+
for page_n, page_prediction in enumerate(json_response["document"]["inference"]["pages"]):
306+
pages.append(
307+
DOCUMENT_CLASSES[document_type](
308+
api_prediction=page_prediction["prediction"],
309+
input_file=input_file,
310+
page_n=page_prediction["id"]
331311
)
332-
else:
333-
raise Exception("Document type not supported.")
312+
)
334313

335-
document = Document.merge_pages(pages)
314+
# Create the document level object
315+
document_level = DOCUMENT_CLASSES[document_type](
316+
api_prediction=json_response["document"]["inference"]["prediction"],
317+
input_file=input_file,
318+
page_n="-1"
319+
)
336320

337321
return Response(
338322
http_response=json_response,
339323
pages=pages,
340-
document=document,
324+
document=document_level,
341325
document_type=document_type
342326
)

mindee/documents/__init__.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import copy
2-
3-
41
class Document(object):
52
def __init__(self, input_file=None):
63
self.filepath = None
@@ -24,21 +21,3 @@ def _reconstruct(self, *args):
2421

2522
def all_checks(self):
2623
return all(self.checklist)
27-
28-
@staticmethod
29-
def merge_pages(page_documents):
30-
"""
31-
:param page_documents: Document object list
32-
:return: A single Document where each field is set with the maximum probability field
33-
"""
34-
document = copy.deepcopy(page_documents[0])
35-
attributes = [a for a in dir(document)]
36-
for doc in page_documents:
37-
for attribute in attributes:
38-
if not hasattr(getattr(doc, attribute), "probability"):
39-
continue
40-
41-
if getattr(doc, attribute).probability > getattr(document, attribute).probability:
42-
setattr(document, attribute, getattr(doc, attribute))
43-
44-
return document

mindee/documents/financial_document.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,18 +138,26 @@ def build_from_api_prediction(self, api_prediction, input_file, page_n=0):
138138
self.company_number = []
139139

140140
def __str__(self):
141-
return "-----Financial document-----\n" \
141+
return "-----Financial Document data-----\n" \
142142
"Filename: %s \n" \
143-
"Total amount: %s \n" \
143+
"Invoice number: %s \n" \
144+
"Total amount including taxes: %s \n" \
145+
"Total amount excluding taxes: %s \n" \
144146
"Date: %s\n" \
145-
"Merchant name: %s\n" \
147+
"Invoice due date: %s\n" \
148+
"Supplier name: %s\n" \
149+
"Taxes: %s\n" \
146150
"Total taxes: %s\n" \
147151
"----------------------" % \
148152
(
149153
self.filename,
154+
self.invoice_number.value,
150155
self.total_incl.value,
156+
self.total_excl.value,
151157
self.date.value,
158+
self.due_date.value,
152159
self.merchant_name.value,
160+
",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
153161
self.total_tax.value
154162
)
155163

mindee/documents/invoice.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ def __init__(
2626
supplier=None,
2727
payment_details=None,
2828
company_number=None,
29-
vat_number=None,
3029
orientation=None,
3130
total_tax=None,
3231
page_n=0
@@ -106,7 +105,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
106105
self.due_date = Date(api_prediction["due_date"], value_key="value", page_n=page_n)
107106
self.invoice_number = Field(api_prediction["invoice_number"], page_n=page_n)
108107
self.locale = Locale(api_prediction["locale"], value_key="language", page_n=page_n)
109-
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
108+
if str(page_n) != "-1":
109+
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
110110
self.supplier = Field(api_prediction["supplier"], page_n=page_n)
111111
self.taxes = [
112112
Tax(tax_prediction, page_n=page_n, value_key="value") for tax_prediction in api_prediction["taxes"]
@@ -128,6 +128,7 @@ def __str__(self):
128128
"Total amount including taxes: %s \n" \
129129
"Total amount excluding taxes: %s \n" \
130130
"Invoice date: %s\n" \
131+
"Invoice due date: %s\n" \
131132
"Supplier name: %s\n" \
132133
"Taxes: %s\n" \
133134
"Total taxes: %s\n" \
@@ -138,6 +139,7 @@ def __str__(self):
138139
self.total_incl.value,
139140
self.total_excl.value,
140141
self.invoice_date.value,
142+
self.due_date.value,
141143
self.supplier.value,
142144
",".join([str(t.value) + " " + str(t.rate) + "%" for t in self.taxes]),
143145
self.total_tax.value

mindee/documents/receipt.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ def build_from_api_prediction(self, api_prediction, page_n=0):
117117
self.taxes = [
118118
Tax(tax_prediction, page_n=page_n, value_key="value", rate_key="rate", code_key="code")
119119
for tax_prediction in api_prediction["taxes"]]
120-
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
120+
if str(page_n) != "-1":
121+
self.orientation = Orientation(api_prediction["orientation"], page_n=page_n)
121122
self.total_tax = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)
122123
self.total_excl = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)
123124

mindee/fields/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ def __init__(
2727
else:
2828
self.probability = 0.
2929

30-
if "segmentation" in abstract_prediction:
31-
self.bbox = abstract_prediction["segmentation"]["bounding_box"]
30+
if "polygon" in abstract_prediction:
31+
self.bbox = abstract_prediction["polygon"]
3232
else:
3333
self.bbox = []
3434

mindee/http.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def request(url, input_file, token, include_words=False):
1111
"""
1212
input_file.file_object.seek(0)
1313

14-
files = {"file": input_file.file_object.read()}
14+
files = {"document": input_file.file_object.read()}
1515

1616
headers = {"X-Inferuser-Token": token}
1717

@@ -20,7 +20,7 @@ def request(url, input_file, token, include_words=False):
2020
params["include_mvision"] = "true"
2121

2222
response = requests.post(
23-
url+"?include_mvision=True",
23+
url,
2424
files=files,
2525
headers=headers,
2626
data=params

mindee/plots.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import numpy as np
21
import matplotlib.pyplot as plt
32

43

@@ -17,30 +16,35 @@ def autolabel(ax, rects):
1716
ha='center', va='bottom', rotation=90)
1817

1918

20-
def plot_metrics(metrics, accuracies, precisions, save_path):
19+
def plot_metrics(metrics, accuracies, precisions, save_path, savefig=True):
2120
"""
21+
:param savefig: Boolean to specify whether saving the plot as a png file or not
2222
:param metrics: List of metrics names
2323
:param accuracies: List of accuracy values
2424
:param precisions: List of precision values
2525
:param save_path: Path to save the figure
26-
:return: (void) plot the precision and accuracy bar charts and save the figure in save_path
26+
:return: the plt object
2727
"""
28-
x = np.arange(len(metrics)) # the label locations
28+
x_range = [float(k) for k in range(len(metrics))] # the label locations
2929
width = 0.4 # the width of the bars
3030

3131
fig, ax = plt.subplots()
3232
fig.subplots_adjust(bottom=0.15)
33-
rects1 = ax.bar(x - width / 2, accuracies, width, color='#fd3246', label='Accuracy')
34-
rects2 = ax.bar(x + width / 2, precisions, width, color='#007af9', label='Precision')
33+
rects1 = ax.bar([x - width / 2 for x in x_range], accuracies, width, color='#fd3246', label='Accuracy')
34+
rects2 = ax.bar([x + width / 2 for x in x_range], precisions, width, color='#007af9', label='Precision')
3535

3636
autolabel(ax, rects1)
3737
autolabel(ax, rects2)
38+
3839
# Add some text for labels, title and custom x-axis tick labels, etc.
3940
ax.set_ylabel('%')
4041
ax.set_title('Metrics')
41-
ax.set_xticks(x)
42+
ax.set_xticks(x_range)
4243
ax.set_xticklabels(metrics, rotation=45, fontsize=6)
4344
ax.legend(loc='lower left')
4445
plt.grid(True, linestyle='--', color='#e1e1e1', alpha=0.4)
4546

46-
plt.savefig(save_path, dpi=300)
47+
if savefig:
48+
plt.savefig(save_path, dpi=300)
49+
50+
return plt

0 commit comments

Comments
 (0)