diff --git a/docs/extras/code_samples/bank_account_details_v1.txt b/docs/extras/code_samples/bank_account_details_v1.txt index efe682b3..380d0adb 100644 --- a/docs/extras/code_samples/bank_account_details_v1.txt +++ b/docs/extras/code_samples/bank_account_details_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/bank_account_details_v2.txt b/docs/extras/code_samples/bank_account_details_v2.txt index 87fd38ca..85ea7a96 100644 --- a/docs/extras/code_samples/bank_account_details_v2.txt +++ b/docs/extras/code_samples/bank_account_details_v2.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/bank_check_v1.txt b/docs/extras/code_samples/bank_check_v1.txt index 3fceaa09..d0550e6b 100644 --- a/docs/extras/code_samples/bank_check_v1.txt +++ b/docs/extras/code_samples/bank_check_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/barcode_reader_v1.txt b/docs/extras/code_samples/barcode_reader_v1.txt index 94983824..637a079d 100644 --- a/docs/extras/code_samples/barcode_reader_v1.txt +++ b/docs/extras/code_samples/barcode_reader_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/carte_grise_v1.txt b/docs/extras/code_samples/carte_grise_v1.txt index 0b4dedbb..b87ca17c 100644 --- a/docs/extras/code_samples/carte_grise_v1.txt +++ b/docs/extras/code_samples/carte_grise_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/cropper_v1.txt b/docs/extras/code_samples/cropper_v1.txt index f90d4a10..e7998667 100644 --- a/docs/extras/code_samples/cropper_v1.txt +++ b/docs/extras/code_samples/cropper_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/custom_v1.txt b/docs/extras/code_samples/custom_v1.txt index 2ee216fe..6e34167b 100644 --- a/docs/extras/code_samples/custom_v1.txt +++ b/docs/extras/code_samples/custom_v1.txt @@ -1,4 +1,5 @@ from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") @@ -10,7 +11,7 @@ my_endpoint = mindee_client.create_endpoint( ) # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Parse the file. # The endpoint must be specified since it cannot be determined from the class. diff --git a/docs/extras/code_samples/default.txt b/docs/extras/code_samples/default.txt index b5df084c..d3380865 100644 --- a/docs/extras/code_samples/default.txt +++ b/docs/extras/code_samples/default.txt @@ -4,6 +4,7 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") @@ -17,7 +18,7 @@ my_endpoint = mindee_client.create_endpoint( ) # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Parse the file. # The endpoint must be specified since it cannot be determined from the class. diff --git a/docs/extras/code_samples/default_async.txt b/docs/extras/code_samples/default_async.txt index 790ddfc3..638b1da3 100644 --- a/docs/extras/code_samples/default_async.txt +++ b/docs/extras/code_samples/default_async.txt @@ -4,6 +4,7 @@ # from mindee.v1 import Client, AsyncPredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") @@ -17,7 +18,7 @@ my_endpoint = mindee_client.create_endpoint( ) # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Parse the file. # The endpoint must be specified since it cannot be determined from the class. diff --git a/docs/extras/code_samples/expense_receipts_v5.txt b/docs/extras/code_samples/expense_receipts_v5.txt index cb1b552b..e52b53b2 100644 --- a/docs/extras/code_samples/expense_receipts_v5.txt +++ b/docs/extras/code_samples/expense_receipts_v5.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/expense_receipts_v5_async.txt b/docs/extras/code_samples/expense_receipts_v5_async.txt index 266a0db9..0a5bf42d 100644 --- a/docs/extras/code_samples/expense_receipts_v5_async.txt +++ b/docs/extras/code_samples/expense_receipts_v5_async.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, product, AsyncPredictResponse +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and enqueue it. result: AsyncPredictResponse = mindee_client.enqueue_and_parse( diff --git a/docs/extras/code_samples/financial_document_v1.txt b/docs/extras/code_samples/financial_document_v1.txt index df1e246a..1585e9cf 100644 --- a/docs/extras/code_samples/financial_document_v1.txt +++ b/docs/extras/code_samples/financial_document_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/financial_document_v1_async.txt b/docs/extras/code_samples/financial_document_v1_async.txt index 483a8e3f..ff598b3d 100644 --- a/docs/extras/code_samples/financial_document_v1_async.txt +++ b/docs/extras/code_samples/financial_document_v1_async.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, product, AsyncPredictResponse +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and enqueue it. result: AsyncPredictResponse = mindee_client.enqueue_and_parse( diff --git a/docs/extras/code_samples/idcard_fr_v1.txt b/docs/extras/code_samples/idcard_fr_v1.txt index 125219b3..84ac3246 100644 --- a/docs/extras/code_samples/idcard_fr_v1.txt +++ b/docs/extras/code_samples/idcard_fr_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/idcard_fr_v2.txt b/docs/extras/code_samples/idcard_fr_v2.txt index 1f64f154..e6a00bea 100644 --- a/docs/extras/code_samples/idcard_fr_v2.txt +++ b/docs/extras/code_samples/idcard_fr_v2.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/international_id_v2_async.txt b/docs/extras/code_samples/international_id_v2_async.txt index 5534b626..ad88ff95 100644 --- a/docs/extras/code_samples/international_id_v2_async.txt +++ b/docs/extras/code_samples/international_id_v2_async.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, product, AsyncPredictResponse +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and enqueue it. result: AsyncPredictResponse = mindee_client.enqueue_and_parse( diff --git a/docs/extras/code_samples/invoice_splitter_v1_async.txt b/docs/extras/code_samples/invoice_splitter_v1_async.txt index 9ea947e3..568e8f4b 100644 --- a/docs/extras/code_samples/invoice_splitter_v1_async.txt +++ b/docs/extras/code_samples/invoice_splitter_v1_async.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, product, AsyncPredictResponse +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and enqueue it. result: AsyncPredictResponse = mindee_client.enqueue_and_parse( diff --git a/docs/extras/code_samples/invoices_v4.txt b/docs/extras/code_samples/invoices_v4.txt index a903df6f..0d09eb4e 100644 --- a/docs/extras/code_samples/invoices_v4.txt +++ b/docs/extras/code_samples/invoices_v4.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/invoices_v4_async.txt b/docs/extras/code_samples/invoices_v4_async.txt index a221fa2a..2a117c3e 100644 --- a/docs/extras/code_samples/invoices_v4_async.txt +++ b/docs/extras/code_samples/invoices_v4_async.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, product, AsyncPredictResponse +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and enqueue it. result: AsyncPredictResponse = mindee_client.enqueue_and_parse( diff --git a/docs/extras/code_samples/multi_receipts_detector_v1.txt b/docs/extras/code_samples/multi_receipts_detector_v1.txt index 0537611a..8cf09a8b 100644 --- a/docs/extras/code_samples/multi_receipts_detector_v1.txt +++ b/docs/extras/code_samples/multi_receipts_detector_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/passport_v1.txt b/docs/extras/code_samples/passport_v1.txt index b372ccef..3ac8d957 100644 --- a/docs/extras/code_samples/passport_v1.txt +++ b/docs/extras/code_samples/passport_v1.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, PredictResponse, product +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and parse it. result: PredictResponse = mindee_client.parse( diff --git a/docs/extras/code_samples/workflow_execution.txt b/docs/extras/code_samples/workflow_execution.txt index 69f0297d..4a201830 100644 --- a/docs/extras/code_samples/workflow_execution.txt +++ b/docs/extras/code_samples/workflow_execution.txt @@ -5,6 +5,7 @@ from mindee.v1 import Client, WorkflowResponse from mindee.v1.parsing.common import ExecutionPriority +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") @@ -12,7 +13,7 @@ mindee_client = Client(api_key="my-api-key") workflow_id = "workflow-id" # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Send the file to the workflow. result: WorkflowResponse = mindee_client.execute_workflow( diff --git a/docs/extras/code_samples/workflow_ots_rag.txt b/docs/extras/code_samples/workflow_ots_rag.txt index b457ff1b..ded746c8 100644 --- a/docs/extras/code_samples/workflow_ots_rag.txt +++ b/docs/extras/code_samples/workflow_ots_rag.txt @@ -4,12 +4,13 @@ # from mindee.v1 import Client, product, AsyncPredictResponse +from mindee import PathInput # Init a new client mindee_client = Client(api_key="my-api-key") # Load a file from disk -input_doc = mindee_client.source_from_path("/path/to/the/file.ext") +input_doc = PathInput("/path/to/the/file.ext") # Load a file from disk and enqueue it. result: AsyncPredictResponse = mindee_client.enqueue_and_parse( diff --git a/mindee/client_mixin.py b/mindee/client_mixin.py index 632bb641..8a4c5753 100644 --- a/mindee/client_mixin.py +++ b/mindee/client_mixin.py @@ -1,81 +1,9 @@ -from pathlib import Path -from typing import BinaryIO, Union - from mindee.error import MindeeClientError -from mindee.input.base_64_input import Base64Input -from mindee.input.bytes_input import BytesInput -from mindee.input.file_input import FileInput -from mindee.input.path_input import PathInput -from mindee.input.url_input_source import URLInputSource class ClientMixin: """Mixin for clients V1 & V2 common static methods.""" - @staticmethod - def source_from_path( - input_path: Union[Path, str], fix_pdf: bool = False - ) -> PathInput: - """ - Load a document from a path, as a string or a `Path` object. - - :params input_path: Path of file to open - :params fix_pdf: Whether to attempt fixing PDF files before sending. - Setting this to `True` can modify the data sent to Mindee. - """ - input_doc = PathInput(input_path) - if fix_pdf: - input_doc.fix_pdf() - return input_doc - - @staticmethod - def source_from_file(input_file: BinaryIO, fix_pdf: bool = False) -> FileInput: - """ - Load a document from a normal Python file object/handle. - - :params input_file: Input file handle - :params fix_pdf: Whether to attempt fixing PDF files before sending. - Setting this to `True` can modify the data sent to Mindee. - """ - input_doc = FileInput(input_file) - if fix_pdf: - input_doc.fix_pdf() - return input_doc - - @staticmethod - def source_from_b64string( - input_string: str, filename: str, fix_pdf: bool = False - ) -> Base64Input: - """ - Load a document from a base64 encoded string. - - :params input_string: Input to parse as base64 string - :params filename: The name of the file (without the path) - :params fix_pdf: Whether to attempt fixing PDF files before sending. - Setting this to `True` can modify the data sent to Mindee. - """ - input_doc = Base64Input(input_string, filename) - if fix_pdf: - input_doc.fix_pdf() - return input_doc - - @staticmethod - def source_from_bytes( - input_bytes: bytes, filename: str, fix_pdf: bool = False - ) -> BytesInput: - """ - Load a document from raw bytes. - - :params input_bytes: Raw byte input - :params filename: The name of the file (without the path) - :params fix_pdf: Whether to attempt fixing PDF files before sending. - Setting this to `True` can modify the data sent to Mindee. - """ - input_doc = BytesInput(input_bytes, filename) - if fix_pdf: - input_doc.fix_pdf() - return input_doc - @staticmethod def _validate_async_params( initial_delay_sec: float, delay_sec: float, max_retries: int @@ -93,16 +21,3 @@ def _validate_async_params( ) if max_retries < min_retries: raise MindeeClientError(f"Cannot set retries to less than {min_retries}.") - - @staticmethod - def source_from_url( - url: str, - ) -> URLInputSource: - """ - Load a document from a URL. - - :params url: Raw byte input - """ - return URLInputSource( - url, - ) diff --git a/mindee/commands/cli_parser.py b/mindee/commands/cli_parser.py index e08799f9..577ccef2 100644 --- a/mindee/commands/cli_parser.py +++ b/mindee/commands/cli_parser.py @@ -2,7 +2,14 @@ from argparse import ArgumentParser, Namespace from typing import Optional, Type, Union -from mindee import LocalInputSource, URLInputSource +from mindee import ( + Base64Input, + BytesInput, + FileInput, + LocalInputSource, + PathInput, + URLInputSource, +) from mindee.v1.client import Client, Endpoint from mindee.commands.cli_products import PRODUCTS, CommandConfig from mindee.error.mindee_error import MindeeClientError @@ -265,17 +272,13 @@ def _get_input_doc(self) -> Union[LocalInputSource, URLInputSource]: """Loads an input document.""" if self.parsed_args.input_type == "file": with open(self.parsed_args.path, "rb", buffering=30) as file_handle: - return self.client.source_from_file(file_handle) + return FileInput(file_handle) elif self.parsed_args.input_type == "base64": with open(self.parsed_args.path, "rt", encoding="ascii") as base64_handle: - return self.client.source_from_b64string( - base64_handle.read(), "test.jpg" - ) + return Base64Input(base64_handle.read(), "test.jpg") elif self.parsed_args.input_type == "bytes": with open(self.parsed_args.path, "rb") as bytes_handle: - return self.client.source_from_bytes( - bytes_handle.read(), bytes_handle.name - ) + return BytesInput(bytes_handle.read(), bytes_handle.name) elif self.parsed_args.input_type == "url": - return self.client.source_from_url(self.parsed_args.path) - return self.client.source_from_path(self.parsed_args.path) + return URLInputSource(self.parsed_args.path) + return PathInput(self.parsed_args.path) diff --git a/mindee/input/file_input.py b/mindee/input/file_input.py index 2c100667..cc4f1dd8 100644 --- a/mindee/input/file_input.py +++ b/mindee/input/file_input.py @@ -1,5 +1,6 @@ +import io import os -from typing import BinaryIO +from typing import BinaryIO, IO, Union, cast from mindee.input.local_input_source import LocalInputSource @@ -7,7 +8,7 @@ class FileInput(LocalInputSource): """A binary file input.""" - def __init__(self, file: BinaryIO) -> None: + def __init__(self, file: Union[BinaryIO, IO[bytes]]) -> None: """ Input document from a Python binary file object. @@ -17,7 +18,12 @@ def __init__(self, file: BinaryIO) -> None: """ assert file.name, "File name must be set" - self.file_object = file + if hasattr(file, "seek") and callable(file.seek): + try: + file.seek(0) + except (io.UnsupportedOperation, OSError): + pass + self.file_object = cast(BinaryIO, file) self.filename = os.path.basename(file.name) self.filepath = file.name super().__init__() diff --git a/tests/v1/extras/test_extras_integration.py b/tests/v1/extras/test_extras_integration.py index 4e7b83b9..6986c696 100644 --- a/tests/v1/extras/test_extras_integration.py +++ b/tests/v1/extras/test_extras_integration.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.international_id import InternationalIdV2 from mindee.v1.product.invoice.invoice_v4 import InvoiceV4 @@ -14,7 +15,7 @@ def client(): @pytest.mark.integration def test_send_cropper_extra(client): - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg", ) response = client.parse(InvoiceV4, sample, cropper=True) @@ -23,7 +24,7 @@ def test_send_cropper_extra(client): @pytest.mark.integration def test_send_full_text_ocr_extra(client): - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "international_id" / "default_sample.jpg", ) response = client.enqueue_and_parse(InternationalIdV2, sample, full_text=True) diff --git a/tests/v1/input/test_url_input_source_integration.py b/tests/v1/input/test_url_input_source_integration.py index 9d688c41..65f812a8 100644 --- a/tests/v1/input/test_url_input_source_integration.py +++ b/tests/v1/input/test_url_input_source_integration.py @@ -3,6 +3,7 @@ import pytest +from mindee import URLInputSource from mindee.v1.client import Client from mindee.v1.product.invoice import InvoiceV4 from tests.utils import cleanup_output_files @@ -25,7 +26,7 @@ def reference_file_path(): @pytest.mark.integration def test_load_local_file(client, reference_file_path): - url_source = client.source_from_url(reference_file_path) + url_source = URLInputSource(reference_file_path) local_source = url_source.as_local_input_source() result = client.parse(InvoiceV4, local_source) assert result.document.n_pages == 5 @@ -34,7 +35,7 @@ def test_load_local_file(client, reference_file_path): @pytest.mark.integration def test_custom_file_name(client, reference_file_path): - url_source = client.source_from_url(reference_file_path) + url_source = URLInputSource(reference_file_path) local_source = url_source.as_local_input_source("customName.pdf") result = client.parse(InvoiceV4, local_source) assert result.document.n_pages == 5 @@ -43,14 +44,14 @@ def test_custom_file_name(client, reference_file_path): @pytest.mark.integration def test_save_file(client, reference_file_path, output_file_path): - url_source = client.source_from_url(reference_file_path) + url_source = URLInputSource(reference_file_path) url_source.save_to_file(output_file_path) assert os.path.exists(os.path.join(output_file_path, "invoice_5p.pdf")) @pytest.mark.integration def test_save_file_with_filename(client, reference_file_path, output_file_path): - url_source = client.source_from_url(reference_file_path) + url_source = URLInputSource(reference_file_path) url_source.save_to_file(output_file_path, "customFileName.pdf") assert os.path.exists(os.path.join(output_file_path, "customFileName.pdf")) diff --git a/tests/v1/mindee_http/test_error.py b/tests/v1/mindee_http/test_error.py index 1115dc84..a78c3895 100644 --- a/tests/v1/mindee_http/test_error.py +++ b/tests/v1/mindee_http/test_error.py @@ -26,10 +26,8 @@ def dummy_client(monkeypatch) -> Client: @pytest.fixture -def dummy_file(monkeypatch) -> PathInput: - clear_envvars(monkeypatch) - c = Client(api_key="dummy-client") - return c.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") +def dummy_file() -> PathInput: + return PathInput(FILE_TYPES_DIR / "pdf" / "blank.pdf") def test_http_client_error(dummy_client: Client, dummy_file: PathInput): @@ -60,11 +58,11 @@ def test_http_400_error(): error_obj["status_code"] = 400 error_400 = handle_error("dummy-url", error_obj) with pytest.raises(MindeeHTTPClientError): + assert error_400.status_code == 400 + assert error_400.api_code == "SomeCode" + assert error_400.api_message == "Some scary message here" + assert error_400.api_details is None raise error_400 - assert error_400.status_code == 400 - assert error_400.api_code == "SomeCode" - assert error_400.api_message == "Some scary message here" - assert error_400.api_details is None def test_http_401_error(): diff --git a/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py b/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py index 834aa11c..22a67061 100644 --- a/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py +++ b/tests/v1/product/barcode_reader/test_barcode_reader_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.barcode_reader import BarcodeReaderV1 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "barcode_reader" / "default_sample.jpg", ) response = client.parse(BarcodeReaderV1, sample) diff --git a/tests/v1/product/cropper/test_cropper_v1_regression.py b/tests/v1/product/cropper/test_cropper_v1_regression.py index dc5096bb..ffee6eab 100644 --- a/tests/v1/product/cropper/test_cropper_v1_regression.py +++ b/tests/v1/product/cropper/test_cropper_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.cropper.cropper_v1 import CropperV1 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "cropper" / "default_sample.jpg", ) response = client.parse(CropperV1, sample) diff --git a/tests/v1/product/financial_document/test_financial_document_v1_regression.py b/tests/v1/product/financial_document/test_financial_document_v1_regression.py index a08f9cc1..49851621 100644 --- a/tests/v1/product/financial_document/test_financial_document_v1_regression.py +++ b/tests/v1/product/financial_document/test_financial_document_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.financial_document.financial_document_v1 import ( FinancialDocumentV1, @@ -20,7 +21,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg", ) response = client.parse(FinancialDocumentV1, sample) diff --git a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py index 11fa0940..20a428da 100644 --- a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py +++ b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.fr.bank_account_details.bank_account_details_v1 import ( BankAccountDetailsV1, @@ -20,7 +21,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "bank_account_details" / "default_sample.jpg", ) response = client.parse(BankAccountDetailsV1, sample) diff --git a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py index e65d1d4c..68e7e2c3 100644 --- a/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py +++ b/tests/v1/product/fr/bank_account_details/test_bank_account_details_v2_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.fr.bank_account_details.bank_account_details_v2 import ( BankAccountDetailsV2, @@ -20,7 +21,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "bank_account_details" / "default_sample.jpg", ) response = client.parse(BankAccountDetailsV2, sample) diff --git a/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py b/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py index 56fbbb45..1651f59c 100644 --- a/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py +++ b/tests/v1/product/fr/carte_grise/test_carte_grise_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.fr.carte_grise.carte_grise_v1 import CarteGriseV1 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "carte_grise" / "default_sample.jpg", ) response = client.parse(CarteGriseV1, sample) diff --git a/tests/v1/product/fr/id_card/test_id_card_v1_regression.py b/tests/v1/product/fr/id_card/test_id_card_v1_regression.py index ab1986bb..620ac96a 100644 --- a/tests/v1/product/fr/id_card/test_id_card_v1_regression.py +++ b/tests/v1/product/fr/id_card/test_id_card_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.fr.id_card.id_card_v1 import IdCardV1 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "idcard_fr" / "default_sample.jpg", ) response = client.parse(IdCardV1, sample) diff --git a/tests/v1/product/fr/id_card/test_id_card_v2_regression.py b/tests/v1/product/fr/id_card/test_id_card_v2_regression.py index 6f29a5d1..ce6b4f05 100644 --- a/tests/v1/product/fr/id_card/test_id_card_v2_regression.py +++ b/tests/v1/product/fr/id_card/test_id_card_v2_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.fr.id_card.id_card_v2 import IdCardV2 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "idcard_fr" / "default_sample.jpg", ) response = client.parse(IdCardV2, sample) diff --git a/tests/v1/product/invoice/test_invoice_v4_regression.py b/tests/v1/product/invoice/test_invoice_v4_regression.py index c29d2887..0b38743f 100644 --- a/tests/v1/product/invoice/test_invoice_v4_regression.py +++ b/tests/v1/product/invoice/test_invoice_v4_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.invoice.invoice_v4 import InvoiceV4 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "invoices" / "default_sample.jpg", ) response = client.parse(InvoiceV4, sample) diff --git a/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py index 277f5214..58bf45a2 100644 --- a/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py +++ b/tests/v1/product/invoice_splitter/test_invoice_splitter_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.invoice_splitter.invoice_splitter_v1 import InvoiceSplitterV1 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,9 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( - V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf" - ) + sample = PathInput(V1_PRODUCT_DATA_DIR / "invoice_splitter" / "default_sample.pdf") response = client.enqueue_and_parse(InvoiceSplitterV1, sample) doc_response = response.document diff --git a/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py index 0c221973..062a304c 100644 --- a/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py +++ b/tests/v1/product/multi_receipts_detector/test_multi_receipts_detector_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.multi_receipts_detector.multi_receipts_detector_v1 import ( MultiReceiptsDetectorV1, @@ -20,7 +21,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "multi_receipts_detector" / "default_sample.jpg", ) response = client.parse(MultiReceiptsDetectorV1, sample) diff --git a/tests/v1/product/passport/test_passport_v1_regression.py b/tests/v1/product/passport/test_passport_v1_regression.py index 0c95e07a..ff36f803 100644 --- a/tests/v1/product/passport/test_passport_v1_regression.py +++ b/tests/v1/product/passport/test_passport_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.passport import PassportV1 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "passport" / "default_sample.jpg", ) response = client.parse(PassportV1, sample) diff --git a/tests/v1/product/receipt/test_receipt_v5_regression.py b/tests/v1/product/receipt/test_receipt_v5_regression.py index 7792d8e1..6029d590 100644 --- a/tests/v1/product/receipt/test_receipt_v5_regression.py +++ b/tests/v1/product/receipt/test_receipt_v5_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.receipt.receipt_v5 import ReceiptV5 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "expense_receipts" / "default_sample.jpg", ) response = client.parse(ReceiptV5, sample) diff --git a/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py b/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py index 9063e712..a8c4287d 100644 --- a/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py +++ b/tests/v1/product/us/bank_check/test_bank_check_v1_regression.py @@ -1,5 +1,6 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.product.us.bank_check.bank_check_v1 import BankCheckV1 from tests.utils import V1_PRODUCT_DATA_DIR @@ -15,7 +16,7 @@ def test_default_sample(): ) as rst_file: rst_ref = rst_file.read() - sample = client.source_from_path( + sample = PathInput( V1_PRODUCT_DATA_DIR / "bank_check" / "default_sample.jpg", ) response = client.parse(BankCheckV1, sample) diff --git a/tests/v1/test_client.py b/tests/v1/test_client.py index 6dc3f5db..45b35c72 100644 --- a/tests/v1/test_client.py +++ b/tests/v1/test_client.py @@ -2,6 +2,7 @@ import pytest +from mindee import Base64Input, PathInput from mindee.v1 import ( AsyncPredictResponse, Client, @@ -46,32 +47,32 @@ def dummy_client() -> Client: def test_parse_path_without_token(empty_client: Client): with pytest.raises(RuntimeError): - input_doc = empty_client.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") + input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "blank.pdf") empty_client.parse(product.ReceiptV5, input_doc) def test_parse_path_with_env_token(env_client: Client): with pytest.raises(MindeeHTTPError): - input_doc = env_client.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") + input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "blank.pdf") env_client.parse(product.ReceiptV5, input_doc) def test_parse_path_with_wrong_filetype(dummy_client: Client): with pytest.raises(AssertionError): - dummy_client.source_from_path(FILE_TYPES_DIR / "receipt.jpga") + PathInput(FILE_TYPES_DIR / "receipt.jpga") def test_parse_path_with_wrong_token(dummy_client: Client): with pytest.raises(MindeeHTTPError): - input_doc = dummy_client.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") + input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "blank.pdf") dummy_client.parse(product.ReceiptV5, input_doc) def test_request_with_wrong_type(dummy_client: Client): with pytest.raises(FileNotFoundError): - dummy_client.source_from_path(open("./tests/data/test.txt").read()) + PathInput(open("./tests/data/test.txt").read()) with pytest.raises(binascii.Error): - dummy_client.source_from_b64string("./tests/data/test.txt", "test.jpg") + Base64Input("./tests/data/test.txt", "test.jpg") def test_interface_version(dummy_client: Client): @@ -81,14 +82,12 @@ def test_interface_version(dummy_client: Client): version="1.1", ) with pytest.raises(MindeeHTTPError): - input_doc = dummy_client.source_from_path(FILE_TYPES_DIR / "receipt.jpg") + input_doc = PathInput(FILE_TYPES_DIR / "receipt.jpg") dummy_client.parse(product.CustomV1, input_doc, endpoint=dummy_endpoint) def test_keep_file_open(dummy_client: Client): - input_doc: LocalInputSource = dummy_client.source_from_path( - f"{FILE_TYPES_DIR}/receipt.jpg" - ) + input_doc: LocalInputSource = PathInput(f"{FILE_TYPES_DIR}/receipt.jpg") try: dummy_client.parse(product.ReceiptV5, input_doc, close_file=False) except MindeeHTTPError: @@ -99,9 +98,7 @@ def test_keep_file_open(dummy_client: Client): def test_cut_options(dummy_client: Client): - input_doc: LocalInputSource = dummy_client.source_from_path( - f"{FILE_TYPES_DIR}/pdf/multipage.pdf" - ) + input_doc: LocalInputSource = PathInput(f"{FILE_TYPES_DIR}/pdf/multipage.pdf") try: # need to keep file open to count the pages after parsing dummy_client.parse( @@ -117,7 +114,7 @@ def test_cut_options(dummy_client: Client): def test_async_wrong_initial_delay(dummy_client: Client): - input_doc = dummy_client.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") + input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "blank.pdf") with pytest.raises(MindeeClientError): dummy_client.enqueue_and_parse( InvoiceSplitterV1, input_doc, initial_delay_sec=0 @@ -125,7 +122,7 @@ def test_async_wrong_initial_delay(dummy_client: Client): def test_async_wrong_polling_delay(dummy_client: Client): - input_doc = dummy_client.source_from_path(FILE_TYPES_DIR / "pdf" / "blank.pdf") + input_doc = PathInput(FILE_TYPES_DIR / "pdf" / "blank.pdf") with pytest.raises(MindeeClientError): dummy_client.enqueue_and_parse(InvoiceSplitterV1, input_doc, delay_sec=0) diff --git a/tests/v1/workflows/test_workflow_integration.py b/tests/v1/workflows/test_workflow_integration.py index 9e2807e6..00544a9c 100644 --- a/tests/v1/workflows/test_workflow_integration.py +++ b/tests/v1/workflows/test_workflow_integration.py @@ -3,6 +3,7 @@ import pytest +from mindee import PathInput from mindee.v1.client import Client from mindee.v1.client_options.workflow_options import WorkflowOptions from mindee.v1.parsing.common import ExecutionPriority @@ -27,7 +28,7 @@ def input_path(): @pytest.mark.integration def test_workflow_execution(mindee_client: Client, workflow_id: str, input_path: str): - input_source = mindee_client.source_from_path(str(input_path)) + input_source = PathInput(str(input_path)) current_date_time = datetime.now().strftime("%Y-%m-%d-%H:%M:%S") alias = f"python-{current_date_time}" priority = ExecutionPriority.LOW @@ -44,7 +45,7 @@ def test_workflow_execution(mindee_client: Client, workflow_id: str, input_path: def test_workflow_predict_ots_rag( mindee_client: Client, workflow_id: str, input_path: str ): - input_source = mindee_client.source_from_path(str(input_path)) + input_source = PathInput(str(input_path)) response = mindee_client.enqueue_and_parse( FinancialDocumentV1, @@ -59,7 +60,7 @@ def test_workflow_predict_ots_rag( def test_workflow_predict_ots_no_rag( mindee_client: Client, workflow_id: str, input_path: str ): - input_source = mindee_client.source_from_path(str(input_path)) + input_source = PathInput(str(input_path)) response = mindee_client.enqueue_and_parse( FinancialDocumentV1, @@ -78,7 +79,7 @@ def test_workflow_predict_custom_rag( endpoint_name="financial_document", ) - input_source = mindee_client.source_from_path(str(input_path)) + input_source = PathInput(str(input_path)) response = mindee_client.enqueue_and_parse( GeneratedV1, @@ -99,7 +100,7 @@ def test_workflow_predict_custom_no_rag( endpoint_name="financial_document", ) - input_source = mindee_client.source_from_path(str(input_path)) + input_source = PathInput(str(input_path)) response = mindee_client.enqueue_and_parse( GeneratedV1, diff --git a/tests/v2/test_client.py b/tests/v2/test_client.py index 1fbe533a..ded27855 100644 --- a/tests/v2/test_client.py +++ b/tests/v2/test_client.py @@ -126,18 +126,14 @@ def test_enqueue_path_with_env_token(custom_base_url_client): assert custom_base_url_client.mindee_api.api_key == "dummy" assert custom_base_url_client.mindee_api.base_headers["Authorization"] == "dummy" assert custom_base_url_client.mindee_api.base_headers["User-Agent"] == USER_AGENT - input_doc: LocalInputSource = custom_base_url_client.source_from_path( - f"{FILE_TYPES_DIR}/receipt.jpg" - ) + input_doc: LocalInputSource = PathInput(f"{FILE_TYPES_DIR}/receipt.jpg") with pytest.raises(MindeeHTTPErrorV2): custom_base_url_client.enqueue(input_doc, ExtractionParameters("dummy-model")) @pytest.mark.v2 def test_enqueue_and_parse_path_with_env_token(custom_base_url_client): - input_doc: LocalInputSource = custom_base_url_client.source_from_path( - f"{FILE_TYPES_DIR}/receipt.jpg" - ) + input_doc: LocalInputSource = PathInput(f"{FILE_TYPES_DIR}/receipt.jpg") with pytest.raises(MindeeHTTPErrorV2): custom_base_url_client.enqueue_and_get_result( ExtractionResponse,