11import json
2- from typing import BinaryIO , Dict , List , NamedTuple , Optional , Type
2+ from typing import BinaryIO , Dict , List , NamedTuple , Optional , Type , Union
33
44from mindee import documents
55from mindee .documents .base import Document , TypeDocument
1010 Base64Input ,
1111 BytesInput ,
1212 FileInput ,
13- InputSource ,
13+ LocalInputSource ,
1414 PathInput ,
15+ UrlInputSource ,
1516)
1617from mindee .logger import logger
1718from mindee .response import PredictResponse
@@ -23,13 +24,13 @@ def get_bound_classname(type_var) -> str:
2324
2425
2526class DocumentClient :
26- input_doc : InputSource
27+ input_doc : Union [ LocalInputSource , UrlInputSource ]
2728 doc_configs : DocumentConfigDict
2829 raise_on_error : bool = True
2930
3031 def __init__ (
3132 self ,
32- input_doc : InputSource ,
33+ input_doc : Union [ LocalInputSource , UrlInputSource ] ,
3334 doc_configs : DocumentConfigDict ,
3435 raise_on_error : bool ,
3536 ):
@@ -108,12 +109,13 @@ def parse(
108109
109110 doc_config = self .doc_configs [config_key ]
110111 doc_config .check_api_keys ()
111- if page_options and self .input_doc .is_pdf ():
112- self .input_doc .process_pdf (
113- page_options .operation ,
114- page_options .on_min_pages ,
115- page_options .page_indexes ,
116- )
112+ if not isinstance (self .input_doc , UrlInputSource ):
113+ if page_options and self .input_doc .is_pdf ():
114+ self .input_doc .process_pdf (
115+ page_options .operation ,
116+ page_options .on_min_pages ,
117+ page_options .page_indexes ,
118+ )
117119 return self ._make_request (
118120 document_class , doc_config , include_words , close_file , cropper
119121 )
@@ -152,7 +154,8 @@ def _make_request(
152154
153155 def close (self ) -> None :
154156 """Close the file object."""
155- self .input_doc .file_object .close ()
157+ if not isinstance (self .input_doc , UrlInputSource ):
158+ self .input_doc .file_object .close ()
156159
157160
158161class ConfigSpec (NamedTuple ):
@@ -397,3 +400,21 @@ def doc_from_bytes(
397400 doc_configs = self ._doc_configs ,
398401 raise_on_error = self .raise_on_error ,
399402 )
403+
404+ def doc_from_url (
405+ self ,
406+ url : str ,
407+ ) -> DocumentClient :
408+ """
409+ Load a document from an URL.
410+
411+ :param url: Raw byte input
412+ """
413+ input_doc = UrlInputSource (
414+ url ,
415+ )
416+ return DocumentClient (
417+ input_doc = input_doc ,
418+ doc_configs = self ._doc_configs ,
419+ raise_on_error = self .raise_on_error ,
420+ )
0 commit comments