Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ updates:
groups:
runtime-dependencies:
patterns:
- "pytz"
- "requests"
optional-dependencies:
patterns:
Expand Down
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ repos:
exclude: "tests/|examples/|docs/"
additional_dependencies:
- toml
- types-pytz
- types-requests
- types-setuptools
- importlib-metadata
Expand All @@ -55,4 +54,4 @@ repos:
entry: make -C docs linkcheck
language: system
pass_filenames: false
files: ^docs/.*$|^mindee/.*\.py$
files: ^docs/.*$|^mindee/.*\.py$
6 changes: 0 additions & 6 deletions mindee/geometry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@
from mindee.geometry.point import Point, Points
from mindee.geometry.polygon import (
Polygon,
is_point_in_polygon_x,
is_point_in_polygon_y,
merge_polygons,
polygon_from_prediction,
)
from mindee.geometry.polygon_utils import get_centroid, is_point_in_x, is_point_in_y
from mindee.geometry.quadrilateral import (
Expand All @@ -27,11 +24,8 @@
"get_centroid",
"get_min_max_x",
"get_min_max_y",
"is_point_in_polygon_x",
"is_point_in_polygon_y",
"is_point_in_x",
"is_point_in_y",
"merge_polygons",
"polygon_from_prediction",
"quadrilateral_from_prediction",
]
37 changes: 0 additions & 37 deletions mindee/geometry/polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,43 +55,6 @@ def __str__(self):
return "(" + ", ".join(str(p) for p in self) + ")"


def is_point_in_polygon_x(point: Point, polygon: Polygon) -> bool:
"""
Deprecated, use ``is_point_in_x`` from ``Polygon`` class instead.

Determine if the Point is in the Polygon's X-axis.

:param point: Point to compare
:param polygon: Polygon to look into
"""
min_x, max_x = get_min_max_x(polygon)
return is_point_in_x(point, min_x, max_x)


def is_point_in_polygon_y(point: Point, polygon: Polygon) -> bool:
"""
Deprecated, use ``is_point_in_y`` from ``Polygon`` class instead.

Determine if the Point is in the Polygon's Y-axis.

:param point: Point to compare
:param polygon: Polygon to look into
"""
min_y, max_y = get_min_max_y(polygon)
return is_point_in_y(point, min_y, max_y)


def polygon_from_prediction(prediction: Sequence[list[float]]) -> Polygon:
"""
Deprecated, init ``Polygon`` class directly instead.

Transform a prediction into a Polygon.

:param prediction: API prediction.
"""
return Polygon([Point(point[0], point[1]) for point in prediction])


def merge_polygons(vertices: Sequence[Polygon]) -> Polygon:
"""
Given a sequence of polygons, calculate a polygon box that encompasses all polygons.
Expand Down
6 changes: 0 additions & 6 deletions mindee/image/extracted_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,6 @@ def save_to_file(self, output_path: Path | str, file_format: str | None = None):
print(exc)
raise MindeeError(f"Could not save file {Path(output_path).name}.") from exc

def as_source(self) -> FileInput:
"""
Deprecated. Use ``as_input_source`` instead.
"""
return self.as_input_source()

def as_input_source(self) -> FileInput:
"""
Return the file as a Mindee-compatible BufferInput source.
Expand Down
45 changes: 22 additions & 23 deletions mindee/input/local_input_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,27 @@ class LocalInputSource:
filename: str
file_mimetype: str
filepath: str | None
_page_count: int | None = None
page_count: int

def __init__(self) -> None:
"""
Initialize a LocalInputSource object.
"""
self._check_mimetype()

if self.is_pdf():
self.file_object.seek(0)
try:
pdf = pdfium.PdfDocument(self.file_object)
self.page_count = len(pdf)
except pdfium.PdfiumError as exc:
logger.warning(
"Could not open PDF file: %s due to %s", self.filename, exc
)
self.page_count = 0
self.file_object.seek(0)
else:
self.page_count = 1
logger.debug(
"Loaded new input '%s' from %s", self.filename, {type(self).__name__}
)
Expand Down Expand Up @@ -103,26 +116,6 @@ def is_pdf(self) -> bool:
""":return: True if the file is a PDF."""
return self.file_mimetype == "application/pdf"

@property
def page_count(self) -> int:
"""
Count the pages in the document.

:return: The number of pages.
"""
if self._page_count is None:
if self.is_pdf():
self.file_object.seek(0)
pdf = pdfium.PdfDocument(self.file_object)
self._page_count = len(pdf)
else:
self._page_count = 1
return self._page_count

def count_doc_pages(self) -> int:
"""Deprecated. Use ``page_count`` instead."""
return self.page_count

def apply_page_options(self, page_options: PageOptions) -> None:
"""Apply cut and merge options on multipage documents."""
if not self.is_pdf():
Expand All @@ -132,12 +125,16 @@ def apply_page_options(self, page_options: PageOptions) -> None:
page_options.on_min_pages,
page_options.page_indexes,
)
self.file_object.seek(0)
pdf = pdfium.PdfDocument(self.file_object)
self.page_count = len(pdf)
pdf.close()

def process_pdf(
self,
behavior: str,
on_min_pages: int,
page_indexes: Sequence,
page_indexes: Sequence[int],
) -> None:
"""Run any required processing on a PDF file."""
if self.is_pdf_empty():
Expand Down Expand Up @@ -183,7 +180,9 @@ def merge_pdf_pages(self, page_numbers: set) -> None:
bytes_io = io.BytesIO()
new_pdf.save(bytes_io)
self.file_object = bytes_io
self._page_count = len(new_pdf)
self.page_count = len(new_pdf)
new_pdf.close()
pdf.close()

def is_pdf_empty(self) -> bool:
"""
Expand Down
4 changes: 0 additions & 4 deletions mindee/pdf/extracted_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ def get_page_count(self) -> int:
"Could not retrieve page count from Extracted PDF object."
) from exc

def write_to_file(self, output_path: str):
"""Deprecated. Use ``save_to_file`` instead."""
self.save_to_file(output_path)

def save_to_file(self, output_path: Path | str):
"""
Writes the contents of the current PDF object to a file.
Expand Down
11 changes: 9 additions & 2 deletions mindee/pdf/pdf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,17 @@ def has_source_text(pdf_bytes: bytes) -> bool:
Checks if the provided PDF bytes contain source text.

:param pdf_bytes: Raw bytes representation of a PDF file
:return:
:return: True if source text is found, False otherwise.
"""
pdf = pdfium.PdfDocument(pdf_bytes)
return any(len(page.get_textpage().get_text_bounded().strip()) > 0 for page in pdf)

try:
return any(
len(page.get_textpage().get_text_bounded().strip()) > 0 for page in pdf
)
finally:
if hasattr(pdf, "close"):
pdf.close()


def extract_text_from_pdf(pdf_bytes: bytes) -> list[list[PDFCharData]]:
Expand Down
2 changes: 1 addition & 1 deletion mindee/v1/mindee_http/base_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
API_KEY_DEFAULT = ""

BASE_URL_ENV_NAME = "MINDEE_BASE_URL"
BASE_URL_DEFAULT = "https://api.mindee.net/v1"
BASE_URL_DEFAULT = "https://api.mindee.net"

REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT"
TIMEOUT_DEFAULT = 120
Expand Down
4 changes: 2 additions & 2 deletions mindee/v1/mindee_http/endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _custom_request(
params["rag"] = "true"

if workflow_id:
url = f"{self.settings.base_url}/workflows/{workflow_id}/{route}"
url = f"{self.settings.base_url}/v1/workflows/{workflow_id}/{route}"
else:
url = f"{self.settings.url_root}/{route}"

Expand Down Expand Up @@ -165,7 +165,7 @@ def document_feedback_req_put(
:param feedback: Feedback object to send.
"""
return requests.put(
f"{self.settings.base_url}/documents/{document_id}/feedback",
f"{self.settings.base_url}/v1/documents/{document_id}/feedback",
headers=self.settings.base_headers,
data=json.dumps(feedback, indent=0),
timeout=self.settings.request_timeout,
Expand Down
2 changes: 1 addition & 1 deletion mindee/v1/mindee_http/mindee_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ def __init__(
self.endpoint_name = endpoint_name
self.account_name = account_name
self.version = version
self.url_root = f"{self.base_url}/products/{self.account_name}/{self.endpoint_name}/v{self.version}"
self.url_root = f"{self.base_url}/v1/products/{self.account_name}/{self.endpoint_name}/v{self.version}"
2 changes: 1 addition & 1 deletion mindee/v1/mindee_http/workflow_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ def __init__(
"You can set this using the "
f"'{API_KEY_ENV_NAME}' environment variable."
)
self.url_root = f"{self.base_url}/workflows/{workflow_id}/executions"
self.url_root = f"{self.base_url}/v1/workflows/{workflow_id}/executions"
10 changes: 4 additions & 6 deletions mindee/v1/parsing/common/ocr/ocr_page.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from mindee.geometry.minmax import get_min_max_y
from mindee.geometry.polygon import is_point_in_polygon_y
from mindee.geometry.polygon_utils import get_centroid
from mindee.parsing.common.string_dict import StringDict
from mindee.v1.parsing.common.ocr.ocr_line import OCRLine
Expand All @@ -25,12 +24,11 @@ def __init__(self, raw_prediction: StringDict) -> None:
@staticmethod
def _are_words_on_same_line(current_word: OCRWord, next_word: OCRWord) -> bool:
"""Determine if two words are on the same line."""
current_in_next = is_point_in_polygon_y(
get_centroid(current_word.polygon),
next_word.polygon,
current_in_next = current_word.polygon.is_point_in_y(
get_centroid(next_word.polygon),
)
next_in_current = is_point_in_polygon_y(
get_centroid(next_word.polygon), current_word.polygon
next_in_current = current_word.polygon.is_point_in_y(
get_centroid(next_word.polygon)
)
# We need to check both to eliminate any issues due to word order.
return current_in_next or next_in_current
Expand Down
10 changes: 3 additions & 7 deletions mindee/v1/parsing/standard/date.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from datetime import date, datetime

import pytz

from mindee.parsing.common import StringDict
from mindee.v1.parsing.standard.base import BaseField, FieldPositionMixin

Expand Down Expand Up @@ -44,11 +42,9 @@ def __init__(
self.is_computed = raw_prediction["is_computed"]
if self.value:
try:
self.date_object = (
datetime.strptime(self.value, ISO8601_DATE_FORMAT)
.replace(tzinfo=pytz.utc)
.date()
)
self.date_object = datetime.strptime(
self.value, ISO8601_DATE_FORMAT
).date()
except (TypeError, ValueError):
self.date_object = None
self.confidence = 0.0
Expand Down
7 changes: 0 additions & 7 deletions mindee/v2/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,6 @@ def get_job(self, job_id: str) -> JobResponse:
handle_error_v2(dict_response)
return JobResponse(dict_response)

def get_inference(
self,
inference_id: str,
) -> BaseResponse:
"""[Deprecated] Use `get_result` instead."""
return self.get_result(ExtractionResponse, inference_id)

def get_result(
self,
response_type: type[TypeBaseResponse],
Expand Down
8 changes: 4 additions & 4 deletions mindee/v2/mindee_http/mindee_api_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
API_KEY_V2_DEFAULT = ""

BASE_URL_ENV_NAME = "MINDEE_V2_BASE_URL"
BASE_URL_DEFAULT = "https://api-v2.mindee.net/v2"
BASE_URL_DEFAULT = "https://api-v2.mindee.net"

REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT"
TIMEOUT_DEFAULT = 120
Expand Down Expand Up @@ -84,7 +84,7 @@ def req_post_inference_enqueue(
:return: requests response.
"""
data = params.get_form_data()
url = f"{self.url_root}/{slug}/enqueue"
url = f"{self.url_root}/v2/{slug}/enqueue"

if isinstance(input_source, LocalInputSource):
files = {"file": input_source.read_contents(params.close_file)}
Expand Down Expand Up @@ -114,7 +114,7 @@ def req_get_job(self, job_id: str) -> requests.Response:
:param job_id: Job ID, returned by the enqueue request.
"""
return requests.get(
f"{self.url_root}/jobs/{job_id}",
f"{self.url_root}/v2/jobs/{job_id}",
headers=self.base_headers,
timeout=self.request_timeout,
allow_redirects=False,
Expand All @@ -128,7 +128,7 @@ def req_get_inference(self, inference_id: str, slug: str) -> requests.Response:
:param slug: Slug of the inference, defaults to nothing.
"""

url = f"{self.url_root}/{slug}/{inference_id}"
url = f"{self.url_root}/v2/{slug}/{inference_id}"
return requests.get(
url,
headers=self.base_headers,
Expand Down
2 changes: 1 addition & 1 deletion mindee/v2/product/crop/crop_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


class CropItem:
"""Deprecated class. Use CropItem instead."""
"""Result of a cropped document region."""

location: FieldLocation
"""Location which includes cropping coordinates for the detected object, within the source document."""
Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ requires-python = ">=3.10"
dependencies = [
"pypdfium2>=4.0,<6.0",
"Pillow>=12.2.0",
"pytz>=2026.2",
"requests>=2.34.2",
]

Expand All @@ -45,7 +44,6 @@ Changelog = "https://github.com/mindee/mindee-api-python/blob/main/CHANGELOG.md"
lint = [
"pylint==4.0.5",
"pre-commit~=4.6.0",
"types-pytz>=2026.2.0.20260518",
"types-requests>=2.33.0.20260518",
"pip-audit>=2.10.0",
]
Expand Down
Loading
Loading