From f6758a6e7836252b4b2220c521aeadb2de31806c Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 16:31:26 -0700 Subject: [PATCH 01/10] Hoist function-level imports to module scope in unit tests Move imports that were nested inside test functions/methods up to the top of their modules, and let usort normalize the import blocks. - test_camm_parser: lift mp4_sample_parser into the mapillary_tools.mp4 import - test_exifread: hoist ExifReadFromEXIF/ExifReadFromXMP/XMP_NAMESPACES, ExifToolRead, ExifToolReadVideo and xml.etree; alias the two clashing EXIFTOOL_NAMESPACES as EXIFTOOL_READ_NAMESPACES / EXIFTOOL_READ_VIDEO_NAMESPACES - test_geo: hoist math and telemetry CAMMGPSPoint/GPSFix/GPSPoint - test_gpmf_parser: hoist struct - remaining files: usort import-block normalization only --- tests/unit/test_api_v4.py | 1 - tests/unit/test_camm_parser.py | 8 +++-- tests/unit/test_exifread.py | 41 ++++++++++---------------- tests/unit/test_exiftool_read_video.py | 1 - tests/unit/test_geo.py | 13 ++------ tests/unit/test_gpmf_gps_filter.py | 1 - tests/unit/test_gpmf_parser.py | 6 +--- tests/unit/test_gpx_serializer.py | 7 +---- tests/unit/test_history.py | 1 - tests/unit/test_http.py | 1 - tests/unit/test_ipc.py | 1 - tests/unit/test_sample_video.py | 1 - 12 files changed, 25 insertions(+), 57 deletions(-) diff --git a/tests/unit/test_api_v4.py b/tests/unit/test_api_v4.py index c0e25982..07287c79 100644 --- a/tests/unit/test_api_v4.py +++ b/tests/unit/test_api_v4.py @@ -7,7 +7,6 @@ import pytest import requests - from mapillary_tools import api_v4 diff --git a/tests/unit/test_camm_parser.py b/tests/unit/test_camm_parser.py index e4bd12d6..23f274a3 100644 --- a/tests/unit/test_camm_parser.py +++ b/tests/unit/test_camm_parser.py @@ -10,7 +10,11 @@ from mapillary_tools import geo, telemetry, types, uploader from mapillary_tools.camm import camm_builder, camm_parser -from mapillary_tools.mp4 import construct_mp4_parser as cparser, simple_mp4_builder +from mapillary_tools.mp4 import ( + construct_mp4_parser as cparser, + mp4_sample_parser as sample_parser, + simple_mp4_builder, +) def test_filter_points_by_edit_list(): @@ -356,8 +360,6 @@ def test_build_and_parse3(): def test_camm_trak_carries_mvhd_timestamps(): """Verify that creation_time and modification_time from the source video's mvhd are carried into the CAMM track's tkhd and mdhd boxes.""" - from mapillary_tools.mp4 import mp4_sample_parser as sample_parser - movie_timescale = 1_000_000 src_creation_time = 3_692_845_200 # 2021-01-01 in MP4 epoch src_modification_time = 3_692_845_300 diff --git a/tests/unit/test_exifread.py b/tests/unit/test_exifread.py index 8e7d51c4..8348cbb2 100644 --- a/tests/unit/test_exifread.py +++ b/tests/unit/test_exifread.py @@ -6,6 +6,7 @@ import datetime import os import typing as T +import xml.etree.ElementTree as ET from pathlib import Path import py.path @@ -14,9 +15,20 @@ from mapillary_tools.exif_read import ( _parse_coord, ExifRead, + ExifReadFromEXIF, + ExifReadFromXMP, parse_datetimestr_with_subsec_and_offset, + XMP_NAMESPACES, ) from mapillary_tools.exif_write import ExifEdit +from mapillary_tools.exiftool_read import ( + EXIFTOOL_NAMESPACES as EXIFTOOL_READ_NAMESPACES, + ExifToolRead, +) +from mapillary_tools.exiftool_read_video import ( + EXIFTOOL_NAMESPACES as EXIFTOOL_READ_VIDEO_NAMESPACES, + ExifToolReadVideo, +) """Initialize all the neccessary data""" @@ -280,7 +292,6 @@ class TestExtractCameraUuidFromEXIF: def test_body_serial_only(self): """Test with only body serial number present""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = { @@ -290,7 +301,6 @@ def test_body_serial_only(self): def test_lens_serial_only(self): """Test with only lens serial number present""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = { @@ -300,7 +310,6 @@ def test_lens_serial_only(self): def test_both_body_and_lens_serial(self): """Test with both body and lens serial numbers present""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = { @@ -311,7 +320,6 @@ def test_both_body_and_lens_serial(self): def test_no_serial_numbers(self): """Test with no serial numbers present""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = {} @@ -319,7 +327,6 @@ def test_no_serial_numbers(self): def test_generic_serial_fallback(self): """Test fallback to generic EXIF SerialNumber""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = { @@ -329,7 +336,6 @@ def test_generic_serial_fallback(self): def test_makernote_serial_fallback(self): """Test fallback to MakerNote SerialNumber""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = { @@ -339,7 +345,6 @@ def test_makernote_serial_fallback(self): def test_body_serial_priority_over_generic(self): """Test that BodySerialNumber takes priority over generic SerialNumber""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = { @@ -350,7 +355,6 @@ def test_body_serial_priority_over_generic(self): def test_whitespace_stripped(self): """Test that whitespace is stripped from serial numbers""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = { @@ -361,7 +365,6 @@ def test_whitespace_stripped(self): def test_special_characters_removed(self): """Test that special characters are removed from serial numbers""" - from mapillary_tools.exif_read import ExifReadFromEXIF reader = ExifReadFromEXIF.__new__(ExifReadFromEXIF) reader.tags = { @@ -376,9 +379,6 @@ class TestExtractCameraUuidFromXMP: def _create_xmp_reader(self, tags_dict: dict): """Helper to create an ExifReadFromXMP with mocked tags""" - from mapillary_tools.exif_read import ExifReadFromXMP, XMP_NAMESPACES - import xml.etree.ElementTree as ET - # Build a minimal XMP document rdf_ns = XMP_NAMESPACES["rdf"] xmp_xml = f''' @@ -456,12 +456,6 @@ class TestVideoExtractCameraUuid: def _create_video_exif_reader(self, tags_dict: dict): """Helper to create an ExifToolReadVideo with mocked tags""" - from mapillary_tools.exiftool_read_video import ( - ExifToolReadVideo, - EXIFTOOL_NAMESPACES, - ) - import xml.etree.ElementTree as ET - # Build XML with child elements (not attributes) - this is how ExifTool XML works root = ET.Element( "rdf:RDF", {"xmlns:rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"} @@ -470,8 +464,8 @@ def _create_video_exif_reader(self, tags_dict: dict): # Add child elements for each tag for key, value in tags_dict.items(): prefix, tag_name = key.split(":") - if prefix in EXIFTOOL_NAMESPACES: - full_tag = "{" + EXIFTOOL_NAMESPACES[prefix] + "}" + tag_name + if prefix in EXIFTOOL_READ_VIDEO_NAMESPACES: + full_tag = "{" + EXIFTOOL_READ_VIDEO_NAMESPACES[prefix] + "}" + tag_name child = ET.SubElement(root, full_tag) child.text = value @@ -528,16 +522,13 @@ class TestExifToolReadExtractCameraUuid: def _create_exiftool_reader(self, tags_dict: dict): """Helper to create an ExifToolRead with mocked tags""" - from mapillary_tools.exiftool_read import ExifToolRead, EXIFTOOL_NAMESPACES - import xml.etree.ElementTree as ET - # Build XML structure that ExifToolRead expects root = ET.Element("rdf:Description") for tag, value in tags_dict.items(): prefix, tag_name = tag.split(":", 1) - if prefix in EXIFTOOL_NAMESPACES: - full_tag = "{" + EXIFTOOL_NAMESPACES[prefix] + "}" + tag_name + if prefix in EXIFTOOL_READ_NAMESPACES: + full_tag = "{" + EXIFTOOL_READ_NAMESPACES[prefix] + "}" + tag_name child = ET.SubElement(root, full_tag) child.text = value diff --git a/tests/unit/test_exiftool_read_video.py b/tests/unit/test_exiftool_read_video.py index ced1c502..ca585d18 100644 --- a/tests/unit/test_exiftool_read_video.py +++ b/tests/unit/test_exiftool_read_video.py @@ -8,7 +8,6 @@ import xml.etree.ElementTree as ET import pytest - from mapillary_tools.exiftool_read_video import ( _aggregate_gps_track, _aggregate_gps_track_by_sample_time, diff --git a/tests/unit/test_geo.py b/tests/unit/test_geo.py index 27790ad8..bd3a785e 100644 --- a/tests/unit/test_geo.py +++ b/tests/unit/test_geo.py @@ -5,12 +5,14 @@ import dataclasses import datetime +import math import random import typing as T import unittest from mapillary_tools import geo from mapillary_tools.geo import Point +from mapillary_tools.telemetry import CAMMGPSPoint, GPSFix, GPSPoint # lat, lon, bearing, alt @@ -778,7 +780,6 @@ def test_avg_speed_with_base_points(self): def test_avg_speed_with_gps_points_using_epoch_time(self): """Test avg_speed with GPSPoint using epoch_time field.""" - from mapillary_tools.telemetry import GPSPoint, GPSFix # Video time is 0-10 seconds, but GPS epoch time spans 100 seconds # This simulates timelapse where video time != GPS time @@ -814,7 +815,6 @@ def test_avg_speed_with_gps_points_using_epoch_time(self): def test_avg_speed_with_gps_points_fallback_to_time(self): """Test avg_speed with GPSPoint falls back to time when epoch_time is None.""" - from mapillary_tools.telemetry import GPSPoint, GPSFix points = [ GPSPoint( @@ -847,7 +847,6 @@ def test_avg_speed_with_gps_points_fallback_to_time(self): def test_avg_speed_with_gps_points_zero_epoch_time_fallback(self): """Test avg_speed falls back to time when epoch_time is 0.""" - from mapillary_tools.telemetry import GPSPoint, GPSFix points = [ GPSPoint( @@ -879,7 +878,6 @@ def test_avg_speed_with_gps_points_zero_epoch_time_fallback(self): def test_avg_speed_with_camm_gps_points(self): """Test avg_speed with CAMMGPSPoint using time_gps_epoch field.""" - from mapillary_tools.telemetry import CAMMGPSPoint # Video time is 0-10 seconds, but GPS epoch time spans 50 seconds points = [ @@ -922,7 +920,6 @@ def test_avg_speed_with_camm_gps_points(self): def test_avg_speed_with_camm_gps_points_zero_epoch_fallback(self): """Test avg_speed with CAMMGPSPoint falls back when time_gps_epoch is 0.""" - from mapillary_tools.telemetry import CAMMGPSPoint points = [ CAMMGPSPoint( @@ -973,7 +970,6 @@ def test_avg_speed_single_point(self): def test_avg_speed_zero_time_diff_returns_nan(self): """Test avg_speed returns NaN when time difference is zero.""" - import math # Two points at the same timestamp points = [ @@ -989,7 +985,6 @@ class TestInterpolatePreservesPointType(unittest.TestCase): def test_interpolate_gps_points_returns_gps_point(self): """Test that interpolating GPSPoints returns a GPSPoint.""" - from mapillary_tools.telemetry import GPSPoint, GPSFix points = [ GPSPoint( @@ -1035,7 +1030,6 @@ def test_interpolate_gps_points_returns_gps_point(self): def test_interpolate_gps_points_with_none_epoch_time(self): """Test interpolating GPSPoints when epoch_time is None.""" - from mapillary_tools.telemetry import GPSPoint, GPSFix points = [ GPSPoint( @@ -1071,7 +1065,6 @@ def test_interpolate_gps_points_with_none_epoch_time(self): def test_interpolate_camm_gps_points_returns_camm_gps_point(self): """Test that interpolating CAMMGPSPoints returns a CAMMGPSPoint.""" - from mapillary_tools.telemetry import CAMMGPSPoint points = [ CAMMGPSPoint( @@ -1145,7 +1138,6 @@ def test_interpolate_base_points_returns_base_point(self): def test_interpolator_preserves_gps_point_type(self): """Test that Interpolator preserves GPSPoint type.""" - from mapillary_tools.telemetry import GPSPoint, GPSFix track = [ GPSPoint( @@ -1180,7 +1172,6 @@ def test_interpolator_preserves_gps_point_type(self): def test_interpolator_preserves_camm_gps_point_type(self): """Test that Interpolator preserves CAMMGPSPoint type.""" - from mapillary_tools.telemetry import CAMMGPSPoint track = [ CAMMGPSPoint( diff --git a/tests/unit/test_gpmf_gps_filter.py b/tests/unit/test_gpmf_gps_filter.py index c86b5c76..c15ef40e 100644 --- a/tests/unit/test_gpmf_gps_filter.py +++ b/tests/unit/test_gpmf_gps_filter.py @@ -8,7 +8,6 @@ import statistics import pytest - from mapillary_tools.geo import Point from mapillary_tools.gpmf import gps_filter from mapillary_tools.gpmf.gpmf_gps_filter import remove_noisy_points, remove_outliers diff --git a/tests/unit/test_gpmf_parser.py b/tests/unit/test_gpmf_parser.py index bae30ce1..f8ac7e94 100644 --- a/tests/unit/test_gpmf_parser.py +++ b/tests/unit/test_gpmf_parser.py @@ -5,10 +5,10 @@ import datetime import os +import struct from pathlib import Path import pytest - from mapillary_tools import telemetry from mapillary_tools.gpmf import gpmf_parser @@ -331,8 +331,6 @@ def _build_gps9_sample_bytes( self, lat, lon, alt, speed2d, speed3d, days, secs_ms, dop, fix ): """Encode raw GPS9 values as bytes using the 'lllllllSS' format.""" - import struct - return struct.pack( ">iiiiiiiHH", lat, @@ -566,8 +564,6 @@ def test_no_strm_key(self): def test_gps9_preferred_over_gps5(self): """GPS9 is tried first within each STRM; GPS5 is fallback.""" - import struct - sample_bytes = struct.pack( ">iiiiiiiHH", 510776007, diff --git a/tests/unit/test_gpx_serializer.py b/tests/unit/test_gpx_serializer.py index 7744d548..3a52cad5 100644 --- a/tests/unit/test_gpx_serializer.py +++ b/tests/unit/test_gpx_serializer.py @@ -11,12 +11,7 @@ from mapillary_tools.geo import Point from mapillary_tools.serializer.gpx import GPXSerializer from mapillary_tools.telemetry import CAMMGPSPoint, GPSFix, GPSPoint -from mapillary_tools.types import ( - ErrorMetadata, - FileType, - ImageMetadata, - VideoMetadata, -) +from mapillary_tools.types import ErrorMetadata, FileType, ImageMetadata, VideoMetadata def _make_image( diff --git a/tests/unit/test_history.py b/tests/unit/test_history.py index a64f5aaf..dd078376 100644 --- a/tests/unit/test_history.py +++ b/tests/unit/test_history.py @@ -9,7 +9,6 @@ from unittest.mock import patch import pytest - from mapillary_tools import history, types diff --git a/tests/unit/test_http.py b/tests/unit/test_http.py index 9e5b5c42..f2367673 100644 --- a/tests/unit/test_http.py +++ b/tests/unit/test_http.py @@ -6,7 +6,6 @@ from unittest.mock import MagicMock import requests - from mapillary_tools import http diff --git a/tests/unit/test_ipc.py b/tests/unit/test_ipc.py index 190421de..37458e6e 100644 --- a/tests/unit/test_ipc.py +++ b/tests/unit/test_ipc.py @@ -8,7 +8,6 @@ from unittest.mock import patch import pytest - from mapillary_tools import ipc diff --git a/tests/unit/test_sample_video.py b/tests/unit/test_sample_video.py index e92aef4a..0743eeb4 100644 --- a/tests/unit/test_sample_video.py +++ b/tests/unit/test_sample_video.py @@ -15,7 +15,6 @@ import py.path import pytest - from mapillary_tools import ( exceptions, exif_read, From 28d46d0df8aedd345d51016d31b1f07ff936000c Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 17:06:29 -0700 Subject: [PATCH 02/10] Add unit tests for exif_read and exif_write coverage gaps Raises coverage of exif_read.py 64%->84% and exif_write.py 71%->87%. test_exifread.py: - TestExifReadFromXMPMetadata: the ExifReadFromXMP metadata extractors (altitude, lon/lat incl. Adobe format, make/model, width/height incl. GPano fallbacks, orientation, direction, EXIF/GPS datetime, capture_time) - TestExtractXmpEfficiently: the JPEG APP1/XMP segment scanner, incl. no-SOI, no-XMP, and non-XMP-segment-skipped cases - TestExifReadXmpFallback: the ExifRead EXIF->XMP fallback path driven through a real BytesIO stream test_exifedit.py: - add_make/add_model round-trip and empty-string ValueError - add_orientation out-of-range ValueError - write() on bytes-backed edit without filename raises - _safe_dump recovery: untrusted wrong-type tag stripped, trusted wrong-type tag re-raised, AsShotNeutral workaround (issue #662) --- tests/unit/test_exifedit.py | 58 ++++++++ tests/unit/test_exifread.py | 270 ++++++++++++++++++++++++++++++++++++ 2 files changed, 328 insertions(+) diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index 571b41ef..4cb408aa 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -216,6 +216,64 @@ def test_add_negative_lat_lon(self): assert (test_longitude, test_latitude) == exif_data.extract_lon_lat() + def test_add_make_and_model(self): + empty_exifedit = ExifEdit(EMPTY_EXIF_FILE_TEST) + empty_exifedit.add_make("Canon") + empty_exifedit.add_model("EOS 5D") + empty_exifedit.write(EMPTY_EXIF_FILE_TEST) + + exif_data = ExifRead(EMPTY_EXIF_FILE_TEST) + self.assertEqual("Canon", exif_data.extract_make()) + self.assertEqual("EOS 5D", exif_data.extract_model()) + + def test_add_make_empty_raises(self): + empty_exifedit = ExifEdit(EMPTY_EXIF_FILE_TEST) + with self.assertRaises(ValueError): + empty_exifedit.add_make("") + + def test_add_model_empty_raises(self): + empty_exifedit = ExifEdit(EMPTY_EXIF_FILE_TEST) + with self.assertRaises(ValueError): + empty_exifedit.add_model("") + + def test_add_orientation_invalid_raises(self): + empty_exifedit = ExifEdit(EMPTY_EXIF_FILE_TEST) + with self.assertRaises(ValueError): + empty_exifedit.add_orientation(99) + + def test_write_bytes_without_filename_raises(self): + with open(EMPTY_EXIF_FILE_TEST, "rb") as fp: + edit = ExifEdit(fp.read()) + edit.add_orientation(1) + # The source is raw bytes, so write() has no filename to fall back on. + with self.assertRaises(ValueError): + edit.write() + + def test_safe_dump_strips_untrusted_wrong_type_tag(self): + """An untrusted tag with a wrong value type is dropped, then dump succeeds.""" + edit = ExifEdit(EMPTY_EXIF_FILE_TEST) + # Software (0x0131) is not a trusted tag; an int is the wrong type for it. + edit._ef["0th"][piexif.ImageIFD.Software] = 123 + image_bytes = edit.dump_image_bytes() + self.assertGreater(len(image_bytes), 0) + self.assertNotIn(piexif.ImageIFD.Software, edit._ef["0th"]) + + def test_safe_dump_reraises_trusted_wrong_type_tag(self): + """A trusted tag with a wrong value type must not be silently dropped.""" + edit = ExifEdit(EMPTY_EXIF_FILE_TEST) + # DateTimeOriginal is a trusted tag; an int is the wrong type for it. + edit._ef["Exif"][piexif.ExifIFD.DateTimeOriginal] = 12345 + with self.assertRaises(ValueError): + edit.dump_image_bytes() + + def test_safe_dump_removes_as_shot_neutral(self): + """The AsShotNeutral workaround (issue #662) drops the tag and retries.""" + edit = ExifEdit(EMPTY_EXIF_FILE_TEST) + edit._ef["0th"][piexif.ImageIFD.AsShotNeutral] = "bad" + image_bytes = edit.dump_image_bytes() + self.assertGreater(len(image_bytes), 0) + self.assertNotIn(piexif.ImageIFD.AsShotNeutral, edit._ef["0th"]) + # REPEAT CERTAIN TESTS AND ADD ADDITIONAL TESTS FOR THE CORRUPT EXIF def test_load_and_dump_corrupt_exif(self): corrupt_exifedit = ExifEdit(CORRUPT_EXIF_FILE) diff --git a/tests/unit/test_exifread.py b/tests/unit/test_exifread.py index 8348cbb2..d285f0c1 100644 --- a/tests/unit/test_exifread.py +++ b/tests/unit/test_exifread.py @@ -4,7 +4,9 @@ # LICENSE file in the root directory of this source tree. import datetime +import io import os +import struct import typing as T import xml.etree.ElementTree as ET from pathlib import Path @@ -17,6 +19,7 @@ ExifRead, ExifReadFromEXIF, ExifReadFromXMP, + extract_xmp_efficiently, parse_datetimestr_with_subsec_and_offset, XMP_NAMESPACES, ) @@ -618,3 +621,270 @@ def test_whitespace_stripped(self): } ) assert reader.extract_camera_uuid() == "BODY123_LENS456" + + +def _build_xmp_doc(tags: T.Dict[str, str]) -> str: + """Build an XMP packet whose rdf:Description carries ``tags`` as attributes.""" + rdf_ns = XMP_NAMESPACES["rdf"] + xml = ( + '' + '' + f'' + " ExifReadFromXMP: + return ExifReadFromXMP(ET.ElementTree(ET.fromstring(_build_xmp_doc(tags)))) + + +def _build_jpeg_with_xmp(xmp_xml: str) -> bytes: + """Build a minimal JPEG containing ``xmp_xml`` in an APP1 XMP segment.""" + identifier = b"http://ns.adobe.com/xap/1.0/\x00" + payload = identifier + xmp_xml.encode("utf-8") + # APP1 length field counts itself (2 bytes) plus the payload + app1 = b"\xff\xe1" + struct.pack(">H", len(payload) + 2) + payload + return b"\xff\xd8" + app1 + b"\xff\xd9" # SOI ... EOI + + +class TestExifReadFromXMPMetadata: + """Exercise the metadata extractors of ExifReadFromXMP (not just camera_uuid).""" + + def test_extract_altitude(self): + assert ( + _make_xmp_reader({"exif:GPSAltitude": "123.5"}).extract_altitude() == 123.5 + ) + + def test_extract_altitude_missing(self): + assert _make_xmp_reader({}).extract_altitude() is None + + def test_extract_lon_lat_numeric(self): + reader = _make_xmp_reader( + { + "exif:GPSLatitude": "50.5", + "exif:GPSLatitudeRef": "N", + "exif:GPSLongitude": "15.5", + "exif:GPSLongitudeRef": "E", + } + ) + assert reader.extract_lon_lat() == (15.5, 50.5) + + def test_extract_lon_lat_adobe_format(self): + reader = _make_xmp_reader( + { + "exif:GPSLatitude": "33,18.32N", + "exif:GPSLatitudeRef": "N", + "exif:GPSLongitude": "44,24.54E", + "exif:GPSLongitudeRef": "E", + } + ) + lonlat = reader.extract_lon_lat() + assert lonlat is not None + lon, lat = lonlat + assert lat == pytest.approx(33.30533, abs=1e-4) + assert lon == pytest.approx(44.40900, abs=1e-4) + + def test_extract_lon_lat_missing(self): + assert _make_xmp_reader({}).extract_lon_lat() is None + + def test_extract_make_and_model_stripped(self): + reader = _make_xmp_reader({"tiff:Make": "Canon ", "tiff:Model": " EOS "}) + assert reader.extract_make() == "Canon" + assert reader.extract_model() == "EOS" + + def test_extract_make_lens_fallback(self): + assert ( + _make_xmp_reader({"exifEX:LensMake": "LensCo"}).extract_make() == "LensCo" + ) + + def test_extract_make_missing(self): + assert _make_xmp_reader({}).extract_make() is None + assert _make_xmp_reader({}).extract_model() is None + + def test_extract_width_height(self): + reader = _make_xmp_reader( + {"exif:PixelXDimension": "1920", "exif:PixelYDimension": "1080"} + ) + assert reader.extract_width() == 1920 + assert reader.extract_height() == 1080 + + def test_extract_width_height_gpano_fallback(self): + assert ( + _make_xmp_reader({"GPano:FullPanoWidthPixels": "4096"}).extract_width() + == 4096 + ) + assert ( + _make_xmp_reader( + {"GPano:CroppedAreaImageHeightPixels": "2048"} + ).extract_height() + == 2048 + ) + + def test_extract_orientation(self): + assert _make_xmp_reader({"tiff:Orientation": "3"}).extract_orientation() == 3 + + def test_extract_orientation_invalid_defaults_to_1(self): + assert _make_xmp_reader({"tiff:Orientation": "99"}).extract_orientation() == 1 + + def test_extract_orientation_missing_defaults_to_1(self): + assert _make_xmp_reader({}).extract_orientation() == 1 + + def test_extract_direction(self): + assert ( + _make_xmp_reader({"exif:GPSImgDirection": "180.5"}).extract_direction() + == 180.5 + ) + + def test_extract_direction_track_fallback(self): + assert _make_xmp_reader({"exif:GPSTrack": "90.0"}).extract_direction() == 90.0 + + def test_extract_direction_missing(self): + assert _make_xmp_reader({}).extract_direction() is None + + def test_extract_exif_datetime(self): + reader = _make_xmp_reader({"exif:DateTimeOriginal": "2021:07:15 15:37:30"}) + assert reader.extract_exif_datetime() == datetime.datetime( + 2021, 7, 15, 15, 37, 30 + ) + + def test_extract_exif_datetime_digitized_fallback(self): + reader = _make_xmp_reader({"exif:DateTimeDigitized": "2020:01:02 03:04:05"}) + assert reader.extract_exif_datetime() == datetime.datetime(2020, 1, 2, 3, 4, 5) + + def test_extract_exif_datetime_missing(self): + assert _make_xmp_reader({}).extract_exif_datetime() is None + + def test_extract_gps_datetime_iso(self): + reader = _make_xmp_reader({"exif:GPSTimeStamp": "2021-07-15T05:37:30Z"}) + assert reader.extract_gps_datetime() == datetime.datetime( + 2021, 7, 15, 5, 37, 30, tzinfo=datetime.timezone.utc + ) + + def test_extract_gps_datetime_separate_date_and_time(self): + reader = _make_xmp_reader( + { + "exif:GPSDateStamp": "2021:07:15", + "exif:GPSTimeStamp": "05:37:30", + } + ) + assert reader.extract_gps_datetime() == datetime.datetime( + 2021, 7, 15, 5, 37, 30, tzinfo=datetime.timezone.utc + ) + + def test_extract_gps_datetime_missing(self): + assert _make_xmp_reader({}).extract_gps_datetime() is None + + def test_extract_capture_time_prefers_gps(self): + reader = _make_xmp_reader( + { + "exif:GPSTimeStamp": "2021-07-15T05:37:30Z", + "exif:DateTimeOriginal": "2000:01:01 00:00:00", + } + ) + assert reader.extract_capture_time() == datetime.datetime( + 2021, 7, 15, 5, 37, 30, tzinfo=datetime.timezone.utc + ) + + def test_extract_capture_time_falls_back_to_exif(self): + reader = _make_xmp_reader({"exif:DateTimeOriginal": "2021:07:15 15:37:30"}) + assert reader.extract_capture_time() == datetime.datetime( + 2021, 7, 15, 15, 37, 30 + ) + + def test_extract_capture_time_missing(self): + assert _make_xmp_reader({}).extract_capture_time() is None + + +class TestExtractXmpEfficiently: + """Cover the JPEG APP1/XMP segment scanner.""" + + def test_returns_xmp_when_present(self): + xmp = _build_xmp_doc({"tiff:Make": "Canon"}) + result = extract_xmp_efficiently(io.BytesIO(_build_jpeg_with_xmp(xmp))) + assert result is not None + assert "" in result + + def test_returns_none_without_soi(self): + assert extract_xmp_efficiently(io.BytesIO(b"not a jpeg")) is None + + def test_returns_none_when_no_xmp_segment(self): + # SOI immediately followed by EOI: valid JPEG start, no APP1/XMP + assert extract_xmp_efficiently(io.BytesIO(b"\xff\xd8\xff\xd9")) is None + + def test_skips_non_xmp_app1_segment(self): + # An APP1 segment that is not XMP (e.g. an EXIF identifier) is skipped, + # and the following XMP APP1 segment is still found. + exif_id = b"Exif\x00\x00rest-of-exif" + exif_app1 = b"\xff\xe1" + struct.pack(">H", len(exif_id) + 2) + exif_id + xmp_app1 = _build_jpeg_with_xmp(_build_xmp_doc({"tiff:Make": "Canon"}))[2:] + data = b"\xff\xd8" + exif_app1 + xmp_app1 + result = extract_xmp_efficiently(io.BytesIO(data)) + assert result is not None + assert "Canon" in result + + +class TestExifReadXmpFallback: + """ExifRead should fall back to XMP when EXIF lacks a field. + + The JPEG built here has no parseable EXIF, so every extractor falls through + to the XMP packet embedded in the APP1 segment. + """ + + def _make_reader(self, tags: T.Dict[str, str]) -> ExifRead: + jpeg = _build_jpeg_with_xmp(_build_xmp_doc(tags)) + return ExifRead(io.BytesIO(jpeg)) + + def test_make_model_fallback(self): + reader = self._make_reader({"tiff:Make": "XMPMake", "tiff:Model": "XMPModel"}) + assert reader.extract_make() == "XMPMake" + assert reader.extract_model() == "XMPModel" + + def test_altitude_fallback(self): + assert ( + self._make_reader({"exif:GPSAltitude": "123.5"}).extract_altitude() == 123.5 + ) + + def test_lon_lat_fallback(self): + reader = self._make_reader( + { + "exif:GPSLatitude": "50.5", + "exif:GPSLatitudeRef": "N", + "exif:GPSLongitude": "15.5", + "exif:GPSLongitudeRef": "E", + } + ) + assert reader.extract_lon_lat() == (15.5, 50.5) + + def test_width_height_fallback(self): + reader = self._make_reader( + {"exif:PixelXDimension": "1920", "exif:PixelYDimension": "1080"} + ) + assert reader.extract_width() == 1920 + assert reader.extract_height() == 1080 + + def test_capture_time_fallback(self): + reader = self._make_reader({"exif:DateTimeOriginal": "2020:01:02 03:04:05"}) + assert reader.extract_capture_time() == datetime.datetime(2020, 1, 2, 3, 4, 5) + + def test_camera_uuid_fallback(self): + reader = self._make_reader( + {"exif:SerialNumber": "BODYX", "exif:LensSerialNumber": "LENSY"} + ) + assert reader.extract_camera_uuid() == "BODYX_LENSY" + + def test_no_xmp_and_no_exif_returns_none(self): + # A JPEG with neither EXIF nor XMP: every extractor returns None. + reader = ExifRead(io.BytesIO(b"\xff\xd8\xff\xd9")) + assert reader.extract_make() is None + assert reader.extract_lon_lat() is None + assert reader.extract_capture_time() is None + assert reader.extract_camera_uuid() is None From 616eaed1da487151af1011ce8688a21a88da8e25 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 17:33:11 -0700 Subject: [PATCH 03/10] Drop exif_write tests that reach into ExifEdit._ef internals The three _safe_dump recovery tests injected malformed tags via the private ExifEdit._ef dict. There is no public API to trigger those branches, so remove the tests rather than depend on internals. --- tests/unit/test_exifedit.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index 4cb408aa..5f9310b1 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -249,31 +249,6 @@ def test_write_bytes_without_filename_raises(self): with self.assertRaises(ValueError): edit.write() - def test_safe_dump_strips_untrusted_wrong_type_tag(self): - """An untrusted tag with a wrong value type is dropped, then dump succeeds.""" - edit = ExifEdit(EMPTY_EXIF_FILE_TEST) - # Software (0x0131) is not a trusted tag; an int is the wrong type for it. - edit._ef["0th"][piexif.ImageIFD.Software] = 123 - image_bytes = edit.dump_image_bytes() - self.assertGreater(len(image_bytes), 0) - self.assertNotIn(piexif.ImageIFD.Software, edit._ef["0th"]) - - def test_safe_dump_reraises_trusted_wrong_type_tag(self): - """A trusted tag with a wrong value type must not be silently dropped.""" - edit = ExifEdit(EMPTY_EXIF_FILE_TEST) - # DateTimeOriginal is a trusted tag; an int is the wrong type for it. - edit._ef["Exif"][piexif.ExifIFD.DateTimeOriginal] = 12345 - with self.assertRaises(ValueError): - edit.dump_image_bytes() - - def test_safe_dump_removes_as_shot_neutral(self): - """The AsShotNeutral workaround (issue #662) drops the tag and retries.""" - edit = ExifEdit(EMPTY_EXIF_FILE_TEST) - edit._ef["0th"][piexif.ImageIFD.AsShotNeutral] = "bad" - image_bytes = edit.dump_image_bytes() - self.assertGreater(len(image_bytes), 0) - self.assertNotIn(piexif.ImageIFD.AsShotNeutral, edit._ef["0th"]) - # REPEAT CERTAIN TESTS AND ADD ADDITIONAL TESTS FOR THE CORRUPT EXIF def test_load_and_dump_corrupt_exif(self): corrupt_exifedit = ExifEdit(CORRUPT_EXIF_FILE) From 8306b6e9f6c16067b724fb48c97a6319bd104c3a Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 17:35:34 -0700 Subject: [PATCH 04/10] Revert "Drop exif_write tests that reach into ExifEdit._ef internals" This reverts commit 616eaed1da487151af1011ce8688a21a88da8e25. --- tests/unit/test_exifedit.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index 5f9310b1..4cb408aa 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -249,6 +249,31 @@ def test_write_bytes_without_filename_raises(self): with self.assertRaises(ValueError): edit.write() + def test_safe_dump_strips_untrusted_wrong_type_tag(self): + """An untrusted tag with a wrong value type is dropped, then dump succeeds.""" + edit = ExifEdit(EMPTY_EXIF_FILE_TEST) + # Software (0x0131) is not a trusted tag; an int is the wrong type for it. + edit._ef["0th"][piexif.ImageIFD.Software] = 123 + image_bytes = edit.dump_image_bytes() + self.assertGreater(len(image_bytes), 0) + self.assertNotIn(piexif.ImageIFD.Software, edit._ef["0th"]) + + def test_safe_dump_reraises_trusted_wrong_type_tag(self): + """A trusted tag with a wrong value type must not be silently dropped.""" + edit = ExifEdit(EMPTY_EXIF_FILE_TEST) + # DateTimeOriginal is a trusted tag; an int is the wrong type for it. + edit._ef["Exif"][piexif.ExifIFD.DateTimeOriginal] = 12345 + with self.assertRaises(ValueError): + edit.dump_image_bytes() + + def test_safe_dump_removes_as_shot_neutral(self): + """The AsShotNeutral workaround (issue #662) drops the tag and retries.""" + edit = ExifEdit(EMPTY_EXIF_FILE_TEST) + edit._ef["0th"][piexif.ImageIFD.AsShotNeutral] = "bad" + image_bytes = edit.dump_image_bytes() + self.assertGreater(len(image_bytes), 0) + self.assertNotIn(piexif.ImageIFD.AsShotNeutral, edit._ef["0th"]) + # REPEAT CERTAIN TESTS AND ADD ADDITIONAL TESTS FOR THE CORRUPT EXIF def test_load_and_dump_corrupt_exif(self): corrupt_exifedit = ExifEdit(CORRUPT_EXIF_FILE) From b05408cbb73ea3ded25e5ff73572fe2e1fe41fc0 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 17:46:37 -0700 Subject: [PATCH 05/10] Cover _safe_dump recovery via committed fixtures instead of _ef Replace the two ExifEdit._safe_dump tests that injected malformed tags through the private _ef dict with file-based equivalents driven by real JPEG fixtures whose EXIF piexif can load but cannot re-dump. - tests/unit/generate_corrupt_exif_image.py: hand-assembles the EXIF (PIL + raw TIFF bytes), since neither PIL nor piexif.dump can emit a malformed-but-loadable tag. - tests/data/corrupt_exif_wrong_type.jpg: Software (non-trusted) stored as SHORT -> _safe_dump strips it and retries. - tests/data/corrupt_exif_trusted_wrong_type.jpg: ImageDescription (trusted) stored as SHORT -> _safe_dump re-raises. The AsShotNeutral workaround branch is dropped: it can't be reproduced from a file (piexif decodes it to a dumpable value) without _ef access. --- .../data/corrupt_exif_trusted_wrong_type.jpg | Bin 0 -> 663 bytes tests/data/corrupt_exif_wrong_type.jpg | Bin 0 -> 663 bytes tests/unit/generate_corrupt_exif_image.py | 82 ++++++++++++++++++ tests/unit/test_exifedit.py | 39 +++++---- 4 files changed, 104 insertions(+), 17 deletions(-) create mode 100644 tests/data/corrupt_exif_trusted_wrong_type.jpg create mode 100644 tests/data/corrupt_exif_wrong_type.jpg create mode 100644 tests/unit/generate_corrupt_exif_image.py diff --git a/tests/data/corrupt_exif_trusted_wrong_type.jpg b/tests/data/corrupt_exif_trusted_wrong_type.jpg new file mode 100644 index 0000000000000000000000000000000000000000..33616936442ebf932274dacbf10e0870fdfd74a7 GIT binary patch literal 663 zcmex=EDnVDFaSy@?FfU>nfc?K3iRv|@0M>gTWM0TY@5u?V5 z3ptdXHXalWy7)oGIH{I3zSIJR&kG zIVCkMJtH%#xTLhKyrQzIxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l z&RvHNA31vL_=%IJE?vHI_1g6tH*Yval)|vI#i`vL_Y_D;YI%h&WALxbYyTvT@J{ z(WIh_Tw*FF4^=;cyax6eaUN?T%V%(pA^dfVfrpt97Aj!Z`4a6Yu|2Bg&0|y%$I~ywpJ3Bik zCkGdg2rmyeH;<%{Fu#bbl)Rj*l#Gmmik`ZHl8&;BjE1?Uj)9?xiHW?rrLBdLjh?ZI z5y%imPEJl9ZXO9EDnVDFaSy@?FfU>nfc?K3iRv|@0M>gTWM0TY@5u?V5 z3ptdXHXalWy7)oGIH{I3zSIJR&kG zIVCkMJtH%#xTLhKyrQzIxuvzOy`!^h(&Q;qr%j(RbJn88OO`HMzGCI7O`ErD-L`$l z&RvHNA31vL_=%IJE?vHI_1g6tH*Yval)|vI#i`vL_Y_D;YI%h&WALxbYyTvT@J{ z(WIh_Tw*FF4^=;cyax6eaUN?T%V%(pA^dfVfrpt97 dump succeeds. +- ``corrupt_exif_trusted_wrong_type.jpg`` uses ImageDescription (0x010E), a + *trusted* tag, so _safe_dump must re-raise instead of silently dropping it. + +Neither PIL nor ``piexif.dump`` can produce such files (both normalize or +reject the malformed value), so the EXIF block is assembled by hand. + +Run from the repo root to regenerate the committed fixtures: + + uv run python tests/unit/generate_corrupt_exif_image.py +""" + +from __future__ import annotations + +import io +import struct +from pathlib import Path + +import piexif +from PIL import Image + +_TYPE_SHORT = 3 # write the value as a SHORT regardless of the tag's real type + +# (filename, IFD0 tag id) for each fixture. +_SOFTWARE_TAG = 0x0131 # non-trusted ASCII tag -> _safe_dump strips and retries +_IMAGE_DESCRIPTION_TAG = 0x010E # trusted ASCII tag -> _safe_dump re-raises + + +def _build_one_entry_tiff(tag: int) -> bytes: + """Little-endian TIFF with one IFD0 entry: ``tag`` stored as a SHORT.""" + header = b"II" + struct.pack(" bytes: + """Return JPEG bytes with a loadable-but-undumpable EXIF block for ``tag``.""" + base = Image.new("RGB", (32, 32), "green") + base_buf = io.BytesIO() + base.save(base_buf, "JPEG") + + exif_bytes = b"Exif\x00\x00" + _build_one_entry_tiff(tag) + out = io.BytesIO() + piexif.insert(exif_bytes, base_buf.getvalue(), out) + return out.getvalue() + + +def main() -> None: + # tests/unit/ -> tests/ -> tests/data + data_dir = Path(__file__).resolve().parents[1] / "data" + fixtures = { + "corrupt_exif_wrong_type.jpg": _SOFTWARE_TAG, + "corrupt_exif_trusted_wrong_type.jpg": _IMAGE_DESCRIPTION_TAG, + } + for filename, tag in fixtures.items(): + out_path = data_dir / filename + out_path.write_bytes(build_wrong_type_exif_jpeg(tag)) + print(f"Wrote {out_path}") + + +if __name__ == "__main__": + main() diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index 4cb408aa..09c0e1a8 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -27,6 +27,13 @@ CORRUPT_EXIF_FILE_2 = data_dir.joinpath("corrupt_exif_2.jpg") FIXED_EXIF_FILE = data_dir.joinpath("fixed_exif.jpg") FIXED_EXIF_FILE_2 = data_dir.joinpath("fixed_exif_2.jpg") +# JPEGs whose EXIF piexif can load but cannot re-dump (a tag stored with the +# wrong type). See tests/unit/generate_corrupt_exif_image.py. +SHARED_DATA_DIR = this_file_dir.parent.joinpath("data") +UNDUMPABLE_EXIF_FILE = SHARED_DATA_DIR.joinpath("corrupt_exif_wrong_type.jpg") +UNDUMPABLE_TRUSTED_EXIF_FILE = SHARED_DATA_DIR.joinpath( + "corrupt_exif_trusted_wrong_type.jpg" +) def add_image_description_general(_test_obj, filename): @@ -250,30 +257,28 @@ def test_write_bytes_without_filename_raises(self): edit.write() def test_safe_dump_strips_untrusted_wrong_type_tag(self): - """An untrusted tag with a wrong value type is dropped, then dump succeeds.""" - edit = ExifEdit(EMPTY_EXIF_FILE_TEST) - # Software (0x0131) is not a trusted tag; an int is the wrong type for it. - edit._ef["0th"][piexif.ImageIFD.Software] = 123 + """A non-trusted tag piexif loads but can't re-dump is stripped, then dump succeeds. + + The fixture stores Software (non-trusted) with a type piexif decodes as + an int but refuses to dump as ASCII; _safe_dump drops it and retries. + """ + edit = ExifEdit(UNDUMPABLE_EXIF_FILE) image_bytes = edit.dump_image_bytes() self.assertGreater(len(image_bytes), 0) - self.assertNotIn(piexif.ImageIFD.Software, edit._ef["0th"]) + # The offending Software tag is gone from the recovered output. + recovered = piexif.load(image_bytes) + self.assertNotIn(piexif.ImageIFD.Software, recovered["0th"]) def test_safe_dump_reraises_trusted_wrong_type_tag(self): - """A trusted tag with a wrong value type must not be silently dropped.""" - edit = ExifEdit(EMPTY_EXIF_FILE_TEST) - # DateTimeOriginal is a trusted tag; an int is the wrong type for it. - edit._ef["Exif"][piexif.ExifIFD.DateTimeOriginal] = 12345 + """A trusted tag with a wrong value type must not be silently dropped. + + The fixture stores ImageDescription (trusted) with the wrong type, so + _safe_dump re-raises rather than stripping it. + """ + edit = ExifEdit(UNDUMPABLE_TRUSTED_EXIF_FILE) with self.assertRaises(ValueError): edit.dump_image_bytes() - def test_safe_dump_removes_as_shot_neutral(self): - """The AsShotNeutral workaround (issue #662) drops the tag and retries.""" - edit = ExifEdit(EMPTY_EXIF_FILE_TEST) - edit._ef["0th"][piexif.ImageIFD.AsShotNeutral] = "bad" - image_bytes = edit.dump_image_bytes() - self.assertGreater(len(image_bytes), 0) - self.assertNotIn(piexif.ImageIFD.AsShotNeutral, edit._ef["0th"]) - # REPEAT CERTAIN TESTS AND ADD ADDITIONAL TESTS FOR THE CORRUPT EXIF def test_load_and_dump_corrupt_exif(self): corrupt_exifedit = ExifEdit(CORRUPT_EXIF_FILE) From 59ffd283001580bf7b14d06fa87463a3a03b95c4 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 17:48:04 -0700 Subject: [PATCH 06/10] Move corrupt-EXIF fixtures to tests/unit/data Relocate the undumpable-EXIF fixtures alongside the other unit-test fixtures and reference them via the test's data_dir. --- .../data/corrupt_exif_trusted_wrong_type.jpg | Bin tests/{ => unit}/data/corrupt_exif_wrong_type.jpg | Bin 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/{ => unit}/data/corrupt_exif_trusted_wrong_type.jpg (100%) rename tests/{ => unit}/data/corrupt_exif_wrong_type.jpg (100%) diff --git a/tests/data/corrupt_exif_trusted_wrong_type.jpg b/tests/unit/data/corrupt_exif_trusted_wrong_type.jpg similarity index 100% rename from tests/data/corrupt_exif_trusted_wrong_type.jpg rename to tests/unit/data/corrupt_exif_trusted_wrong_type.jpg diff --git a/tests/data/corrupt_exif_wrong_type.jpg b/tests/unit/data/corrupt_exif_wrong_type.jpg similarity index 100% rename from tests/data/corrupt_exif_wrong_type.jpg rename to tests/unit/data/corrupt_exif_wrong_type.jpg From 32c487508391a28a3d48897790ebd4accec738e6 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 17:48:13 -0700 Subject: [PATCH 07/10] Reference relocated corrupt-EXIF fixtures from tests/unit/data Point test_exifedit.py at the fixtures via data_dir and update the generator's output path accordingly. --- tests/unit/generate_corrupt_exif_image.py | 3 +-- tests/unit/test_exifedit.py | 9 +++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/unit/generate_corrupt_exif_image.py b/tests/unit/generate_corrupt_exif_image.py index 8cc6584b..b05414bf 100644 --- a/tests/unit/generate_corrupt_exif_image.py +++ b/tests/unit/generate_corrupt_exif_image.py @@ -66,8 +66,7 @@ def build_wrong_type_exif_jpeg(tag: int) -> bytes: def main() -> None: - # tests/unit/ -> tests/ -> tests/data - data_dir = Path(__file__).resolve().parents[1] / "data" + data_dir = Path(__file__).resolve().parent / "data" fixtures = { "corrupt_exif_wrong_type.jpg": _SOFTWARE_TAG, "corrupt_exif_trusted_wrong_type.jpg": _IMAGE_DESCRIPTION_TAG, diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index 09c0e1a8..8b8f1331 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -28,12 +28,9 @@ FIXED_EXIF_FILE = data_dir.joinpath("fixed_exif.jpg") FIXED_EXIF_FILE_2 = data_dir.joinpath("fixed_exif_2.jpg") # JPEGs whose EXIF piexif can load but cannot re-dump (a tag stored with the -# wrong type). See tests/unit/generate_corrupt_exif_image.py. -SHARED_DATA_DIR = this_file_dir.parent.joinpath("data") -UNDUMPABLE_EXIF_FILE = SHARED_DATA_DIR.joinpath("corrupt_exif_wrong_type.jpg") -UNDUMPABLE_TRUSTED_EXIF_FILE = SHARED_DATA_DIR.joinpath( - "corrupt_exif_trusted_wrong_type.jpg" -) +# wrong type), used to exercise the ExifEdit._safe_dump recovery branches. +UNDUMPABLE_EXIF_FILE = data_dir.joinpath("corrupt_exif_wrong_type.jpg") +UNDUMPABLE_TRUSTED_EXIF_FILE = data_dir.joinpath("corrupt_exif_trusted_wrong_type.jpg") def add_image_description_general(_test_obj, filename): From b09d5283534b39aafb5ed21e84af44f6b03912dc Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 17:59:26 -0700 Subject: [PATCH 08/10] Cover large-thumbnail recovery via fixture instead of _ef Replace the last ExifEdit._ef access in test_exifedit.py. The large thumbnail is no longer injected into the private _ef dict; instead a committed fixture carries a thumbnail above piexif's 64000-byte dump limit (yet within a JPEG APP1 segment), so ExifEdit loads it and _safe_dump drops the thumbnail/1st IFD on dump and retries. tests/unit/data/corrupt_exif_large_thumbnail.jpg added; generator extended to build it. --- .../data/corrupt_exif_large_thumbnail.jpg | Bin 0 -> 65361 bytes tests/unit/generate_corrupt_exif_image.py | 75 +++++++++++++++--- tests/unit/test_exifedit.py | 55 +++---------- 3 files changed, 75 insertions(+), 55 deletions(-) create mode 100644 tests/unit/data/corrupt_exif_large_thumbnail.jpg diff --git a/tests/unit/data/corrupt_exif_large_thumbnail.jpg b/tests/unit/data/corrupt_exif_large_thumbnail.jpg new file mode 100644 index 0000000000000000000000000000000000000000..389c4eaccb80e3e8610f75190da4415accff7d2e GIT binary patch literal 65361 zcmeIvNlX-30D$3FTUS+gGg8nXkVfbRq%nj@(+U`k0ThKBxO)R}y@3mA;sM;b;jTAu z_lAnwMG@S%;R<*eaQ6gmh%-q~HH?~gQcp9<|B_c-FYl$&|M$YZ;eo#cb2qPDB}7@- z6k&+S_`{yEkYrUz_FHyj_3!(KzwCuQ!oA@?MRa*-S*eihGg9PW_^BunhQpydG(*>Q z(=?oJPn6r`awo>bPVgirrA$gr@_JL#b23x?*=b&HAU`X6YHlbLO35soRS=w+lNSm` zJ|dZ>>2|pjqM{OlKCdtM%OiY4ScctVD3V{umZVrxxJ#ti|5KC3#(0FimK9ZV=!WTZ z*$3LAg)AwGtSXwOs`lQ0t>Hb9XZ3YF4gVy=LvY^&2*Bs^7e2 z>-HTxckSM@w`t%00|yTsK63O}>+usOPn|w<_T2dk7uzpgzH;^2&Cae{x9{A&*M0xN z!$*&w^!D`+3_g4Q;^nK?Z{7~Sd;j6%r;*R2Um{(SP{yZ7`A83yjvZN@os%V<3+NZbK z`$Dy}7+-FYBetSS_t$!Y4b3e^T5(%PTxHKtdR}#1qv?#Fn2?y1F*!4kH6=7{dVWFS zjFLHXOXtlmD_^j1(c&dbmn~mWvvSqyHEY+c->`90{pKxOx9`}wYxkbLP5bsAIC$vr zk)y|2kDoYs>hzhj=gwca*na8qm8;ioc6Qymedq4I?)wiOK6?D5x37O-@Y(YhFJHZW z^LF^%`wt&KjeH*c66unJGM;5W$IE``vg|HdRTWi_bV+i(JrzsUe7TO8q6)pLHr5|( zFg(T0Eo~iUT3+Q)Ty(-{x Q{Ugz>O#$&go=UjyABBMBasU7T literal 0 HcmV?d00001 diff --git a/tests/unit/generate_corrupt_exif_image.py b/tests/unit/generate_corrupt_exif_image.py index b05414bf..803bfdeb 100644 --- a/tests/unit/generate_corrupt_exif_image.py +++ b/tests/unit/generate_corrupt_exif_image.py @@ -5,18 +5,24 @@ """Generate JPEGs whose EXIF piexif can *load* but cannot *re-dump*. -Each fixture carries a single IFD0 entry whose value is stored with the wrong -TIFF type: an ASCII tag is written as a SHORT, so piexif decodes it as an -``int`` but on dump expects a ``str`` and raises ``"got wrong type of exif -value"``. This drives the recovery logic in ``ExifEdit._safe_dump``: +These fixtures drive the recovery logic in ``ExifEdit._safe_dump``. Neither PIL +nor ``piexif.dump`` can produce them (both normalize or reject the offending +data), so the EXIF blocks are assembled by hand. + +Wrong-type fixtures store an ASCII tag with TIFF type SHORT, so piexif decodes +it as an ``int`` but on dump expects a ``str`` and raises ``"got wrong type of +exif value"``: - ``corrupt_exif_wrong_type.jpg`` uses Software (0x0131), a *non-trusted* tag, so _safe_dump strips it and retries -> dump succeeds. - ``corrupt_exif_trusted_wrong_type.jpg`` uses ImageDescription (0x010E), a *trusted* tag, so _safe_dump must re-raise instead of silently dropping it. -Neither PIL nor ``piexif.dump`` can produce such files (both normalize or -reject the malformed value), so the EXIF block is assembled by hand. +The large-thumbnail fixture embeds a thumbnail above piexif's 64000-byte dump +limit (but still within a single JPEG APP1 segment), so _safe_dump drops the +thumbnail and the 1st IFD and retries: + +- ``corrupt_exif_large_thumbnail.jpg`` Run from the repo root to regenerate the committed fixtures: @@ -34,10 +40,13 @@ _TYPE_SHORT = 3 # write the value as a SHORT regardless of the tag's real type -# (filename, IFD0 tag id) for each fixture. _SOFTWARE_TAG = 0x0131 # non-trusted ASCII tag -> _safe_dump strips and retries _IMAGE_DESCRIPTION_TAG = 0x010E # trusted ASCII tag -> _safe_dump re-raises +# piexif rejects thumbnails larger than 64000 bytes on dump; this size is over +# that limit yet small enough to fit in a JPEG APP1 segment (max 65535 bytes). +_LARGE_THUMBNAIL_SIZE = 64500 + def _build_one_entry_tiff(tag: int) -> bytes: """Little-endian TIFF with one IFD0 entry: ``tag`` stored as a SHORT.""" @@ -65,17 +74,65 @@ def build_wrong_type_exif_jpeg(tag: int) -> bytes: return out.getvalue() +def _make_oversized_thumbnail() -> bytes: + """A deterministic blob over piexif's 64000-byte thumbnail limit.""" + tiny = io.BytesIO() + Image.new("RGB", (8, 8), "blue").save(tiny, "JPEG") + jpeg = tiny.getvalue() + return jpeg + b"\x00" * (_LARGE_THUMBNAIL_SIZE - len(jpeg)) + + +def _build_thumbnail_tiff(thumbnail: bytes) -> bytes: + """Little-endian TIFF with an empty IFD0 pointing to a thumbnail in IFD1.""" + thumb_offset = 44 # bytes consumed by the header + IFD0 + IFD1 below + out = b"II" + struct.pack(" IFD1 at offset 14 + out += struct.pack(" bytes: + """Return JPEG bytes whose EXIF carries a thumbnail too large to re-dump.""" + base = Image.new("RGB", (100, 100), "red") + base_buf = io.BytesIO() + base.save(base_buf, "JPEG") + + exif_bytes = b"Exif\x00\x00" + _build_thumbnail_tiff(_make_oversized_thumbnail()) + out = io.BytesIO() + piexif.insert(exif_bytes, base_buf.getvalue(), out) + return out.getvalue() + + def main() -> None: data_dir = Path(__file__).resolve().parent / "data" - fixtures = { + wrong_type_fixtures = { "corrupt_exif_wrong_type.jpg": _SOFTWARE_TAG, "corrupt_exif_trusted_wrong_type.jpg": _IMAGE_DESCRIPTION_TAG, } - for filename, tag in fixtures.items(): + for filename, tag in wrong_type_fixtures.items(): out_path = data_dir / filename out_path.write_bytes(build_wrong_type_exif_jpeg(tag)) print(f"Wrote {out_path}") + large_thumb_path = data_dir / "corrupt_exif_large_thumbnail.jpg" + large_thumb_path.write_bytes(build_large_thumbnail_exif_jpeg()) + print(f"Wrote {large_thumb_path}") + if __name__ == "__main__": main() diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index 8b8f1331..6c4a9167 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -31,6 +31,8 @@ # wrong type), used to exercise the ExifEdit._safe_dump recovery branches. UNDUMPABLE_EXIF_FILE = data_dir.joinpath("corrupt_exif_wrong_type.jpg") UNDUMPABLE_TRUSTED_EXIF_FILE = data_dir.joinpath("corrupt_exif_trusted_wrong_type.jpg") +# A JPEG whose EXIF carries a thumbnail above piexif's 64000-byte dump limit. +LARGE_THUMBNAIL_EXIF_FILE = data_dir.joinpath("corrupt_exif_large_thumbnail.jpg") def add_image_description_general(_test_obj, filename): @@ -328,57 +330,18 @@ def test_add_repeatedly_time_original_corrupt_exif_2(self): add_repeatedly_time_original_general(self, CORRUPT_EXIF_FILE_2) def test_large_thumbnail_handling(self): - """Test that images with thumbnails larger than 64kB are handled gracefully.""" - # Create a test image with a large thumbnail (>64kB) - test_image_path = data_dir.joinpath("tmp", "large_thumbnail.jpg") - - # Create a simple test image - img = Image.new("RGB", (100, 100), color="red") - img.save(test_image_path, "JPEG") - - # Create a large thumbnail (>64kB) by creating a high-quality large thumbnail - # Use a larger size and add noise to make it incompressible - large_thumbnail = Image.new("RGB", (2048, 2048)) - # Fill with random-like data to prevent compression - pixels = large_thumbnail.load() - for i in range(2048): - for j in range(2048): - pixels[i, j] = ( - (i * 7 + j * 13) % 256, - (i * 11 + j * 17) % 256, - (i * 19 + j * 23) % 256, - ) - - thumbnail_bytes = io.BytesIO() - large_thumbnail.save(thumbnail_bytes, "JPEG", quality=100) - thumbnail_data = thumbnail_bytes.getvalue() - - # Verify thumbnail is larger than 64kB - self.assertGreater( - len(thumbnail_data), - 64 * 1024, - f"Test thumbnail should be larger than 64kB but got {len(thumbnail_data)} bytes", - ) + """Test that images with thumbnails larger than 64kB are handled gracefully. - # Load the image and add GPS data first - exif_edit = ExifEdit(test_image_path) + The fixture carries a thumbnail above piexif's 64000-byte dump limit, so + _safe_dump must drop the thumbnail and 1st IFD and retry. GPS data added + through the public API must survive. + """ + exif_edit = ExifEdit(LARGE_THUMBNAIL_EXIF_FILE) test_latitude = 50.5475894785 test_longitude = 15.595866685 exif_edit.add_lat_lon(test_latitude, test_longitude) - # Manually insert the large thumbnail into the internal EXIF structure - # This simulates what would happen if an image came in with a large thumbnail - exif_edit._ef["thumbnail"] = thumbnail_data - exif_edit._ef["1st"] = { - piexif.ImageIFD.Compression: 6, - piexif.ImageIFD.XResolution: (72, 1), - piexif.ImageIFD.YResolution: (72, 1), - piexif.ImageIFD.ResolutionUnit: 2, - piexif.ImageIFD.JPEGInterchangeFormat: 0, - piexif.ImageIFD.JPEGInterchangeFormatLength: len(thumbnail_data), - } - - # Given thumbnail is too large, max 64kB, thumbnail and 1st metadata should be removed. + # Given the thumbnail is too large, it and the 1st IFD should be removed. image_bytes = exif_edit.dump_image_bytes() # Verify the output is valid From 28e6a7ee792c021a8729f66c508858888bf7c7d2 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 18:03:24 -0700 Subject: [PATCH 09/10] Describe behavior, not internals, in exif test comments Reword test docstrings/comments and rename tests so they describe observable behavior (unsavable tags, oversized thumbnails) rather than implementation details (private helpers, library specifics, IFD layout). --- tests/unit/test_exifedit.py | 52 +++++++++++++++---------------------- tests/unit/test_exifread.py | 10 +++---- 2 files changed, 24 insertions(+), 38 deletions(-) diff --git a/tests/unit/test_exifedit.py b/tests/unit/test_exifedit.py index 6c4a9167..fb4d311a 100644 --- a/tests/unit/test_exifedit.py +++ b/tests/unit/test_exifedit.py @@ -27,11 +27,12 @@ CORRUPT_EXIF_FILE_2 = data_dir.joinpath("corrupt_exif_2.jpg") FIXED_EXIF_FILE = data_dir.joinpath("fixed_exif.jpg") FIXED_EXIF_FILE_2 = data_dir.joinpath("fixed_exif_2.jpg") -# JPEGs whose EXIF piexif can load but cannot re-dump (a tag stored with the -# wrong type), used to exercise the ExifEdit._safe_dump recovery branches. +# JPEGs whose EXIF can be read but cannot be written back out unchanged. +# UNDUMPABLE_EXIF_FILE carries a non-essential tag that cannot be saved; +# UNDUMPABLE_ESSENTIAL_EXIF_FILE carries an essential one. UNDUMPABLE_EXIF_FILE = data_dir.joinpath("corrupt_exif_wrong_type.jpg") -UNDUMPABLE_TRUSTED_EXIF_FILE = data_dir.joinpath("corrupt_exif_trusted_wrong_type.jpg") -# A JPEG whose EXIF carries a thumbnail above piexif's 64000-byte dump limit. +UNDUMPABLE_ESSENTIAL_EXIF_FILE = data_dir.joinpath("corrupt_exif_trusted_wrong_type.jpg") +# A JPEG whose embedded thumbnail is too large to be written back out. LARGE_THUMBNAIL_EXIF_FILE = data_dir.joinpath("corrupt_exif_large_thumbnail.jpg") @@ -255,26 +256,18 @@ def test_write_bytes_without_filename_raises(self): with self.assertRaises(ValueError): edit.write() - def test_safe_dump_strips_untrusted_wrong_type_tag(self): - """A non-trusted tag piexif loads but can't re-dump is stripped, then dump succeeds. - - The fixture stores Software (non-trusted) with a type piexif decodes as - an int but refuses to dump as ASCII; _safe_dump drops it and retries. - """ + def test_unwritable_non_essential_tag_is_dropped(self): + """An image with an unsavable non-essential tag is still saved without it.""" edit = ExifEdit(UNDUMPABLE_EXIF_FILE) image_bytes = edit.dump_image_bytes() self.assertGreater(len(image_bytes), 0) - # The offending Software tag is gone from the recovered output. - recovered = piexif.load(image_bytes) - self.assertNotIn(piexif.ImageIFD.Software, recovered["0th"]) + # The unsavable tag is absent from the output. + saved = piexif.load(image_bytes) + self.assertNotIn(piexif.ImageIFD.Software, saved["0th"]) - def test_safe_dump_reraises_trusted_wrong_type_tag(self): - """A trusted tag with a wrong value type must not be silently dropped. - - The fixture stores ImageDescription (trusted) with the wrong type, so - _safe_dump re-raises rather than stripping it. - """ - edit = ExifEdit(UNDUMPABLE_TRUSTED_EXIF_FILE) + def test_unwritable_essential_tag_raises(self): + """Saving fails if an essential tag cannot be written, rather than dropping it.""" + edit = ExifEdit(UNDUMPABLE_ESSENTIAL_EXIF_FILE) with self.assertRaises(ValueError): edit.dump_image_bytes() @@ -330,18 +323,17 @@ def test_add_repeatedly_time_original_corrupt_exif_2(self): add_repeatedly_time_original_general(self, CORRUPT_EXIF_FILE_2) def test_large_thumbnail_handling(self): - """Test that images with thumbnails larger than 64kB are handled gracefully. + """An oversized embedded thumbnail is dropped on save; other EXIF survives. - The fixture carries a thumbnail above piexif's 64000-byte dump limit, so - _safe_dump must drop the thumbnail and 1st IFD and retry. GPS data added - through the public API must survive. + The image carries a thumbnail too large to be written back out. Saving + should still succeed, and GPS data added through the API is preserved. """ exif_edit = ExifEdit(LARGE_THUMBNAIL_EXIF_FILE) test_latitude = 50.5475894785 test_longitude = 15.595866685 exif_edit.add_lat_lon(test_latitude, test_longitude) - # Given the thumbnail is too large, it and the 1st IFD should be removed. + # The oversized thumbnail should be dropped so the image can be saved. image_bytes = exif_edit.dump_image_bytes() # Verify the output is valid @@ -353,25 +345,23 @@ def test_large_thumbnail_handling(self): self.assertEqual(result_image.format, "JPEG") self.assertEqual(result_image.size, (100, 100)) - # Verify we can read the GPS data from the result + # The GPS data added before saving is still present. output_exif = piexif.load(image_bytes) self.assertIn("GPS", output_exif) self.assertIn(piexif.GPSIFD.GPSLatitude, output_exif["GPS"]) self.assertIn(piexif.GPSIFD.GPSLongitude, output_exif["GPS"]) - # CRITICAL: Verify the large thumbnail was actually removed - # The fix should have deleted both "thumbnail" and "1st" to handle the error - # piexif.load() may include "thumbnail": None after removal + # The oversized thumbnail is no longer present in the saved image. thumbnail_value = output_exif.get("thumbnail") self.assertTrue( thumbnail_value is None or thumbnail_value == b"", - f"Large thumbnail should have been removed but got: {thumbnail_value[:100] if thumbnail_value else None}", + f"thumbnail should have been removed but got: {thumbnail_value[:100] if thumbnail_value else None}", ) first_value = output_exif.get("1st") self.assertTrue( first_value is None or first_value == {} or len(first_value) == 0, - f"1st metadata should have been removed but got: {first_value}", + f"thumbnail metadata should have been removed but got: {first_value}", ) diff --git a/tests/unit/test_exifread.py b/tests/unit/test_exifread.py index d285f0c1..8d1cce8b 100644 --- a/tests/unit/test_exifread.py +++ b/tests/unit/test_exifread.py @@ -656,7 +656,7 @@ def _build_jpeg_with_xmp(xmp_xml: str) -> bytes: class TestExifReadFromXMPMetadata: - """Exercise the metadata extractors of ExifReadFromXMP (not just camera_uuid).""" + """Tests for reading metadata from XMP.""" def test_extract_altitude(self): assert ( @@ -804,7 +804,7 @@ def test_extract_capture_time_missing(self): class TestExtractXmpEfficiently: - """Cover the JPEG APP1/XMP segment scanner.""" + """Tests for locating XMP metadata embedded in a JPEG.""" def test_returns_xmp_when_present(self): xmp = _build_xmp_doc({"tiff:Make": "Canon"}) @@ -833,11 +833,7 @@ def test_skips_non_xmp_app1_segment(self): class TestExifReadXmpFallback: - """ExifRead should fall back to XMP when EXIF lacks a field. - - The JPEG built here has no parseable EXIF, so every extractor falls through - to the XMP packet embedded in the APP1 segment. - """ + """Reading metadata from a JPEG whose values live in XMP, not EXIF.""" def _make_reader(self, tags: T.Dict[str, str]) -> ExifRead: jpeg = _build_jpeg_with_xmp(_build_xmp_doc(tags)) From f7d3c154d0cfefc43e07a4de61df334d45e6c377 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Fri, 5 Jun 2026 18:07:10 -0700 Subject: [PATCH 10/10] Remove corrupt-EXIF fixture generator script The fixtures are committed binaries under tests/unit/data; the generator is no longer needed. --- tests/unit/generate_corrupt_exif_image.py | 138 ---------------------- 1 file changed, 138 deletions(-) delete mode 100644 tests/unit/generate_corrupt_exif_image.py diff --git a/tests/unit/generate_corrupt_exif_image.py b/tests/unit/generate_corrupt_exif_image.py deleted file mode 100644 index 803bfdeb..00000000 --- a/tests/unit/generate_corrupt_exif_image.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# -# This source code is licensed under the BSD license found in the -# LICENSE file in the root directory of this source tree. - -"""Generate JPEGs whose EXIF piexif can *load* but cannot *re-dump*. - -These fixtures drive the recovery logic in ``ExifEdit._safe_dump``. Neither PIL -nor ``piexif.dump`` can produce them (both normalize or reject the offending -data), so the EXIF blocks are assembled by hand. - -Wrong-type fixtures store an ASCII tag with TIFF type SHORT, so piexif decodes -it as an ``int`` but on dump expects a ``str`` and raises ``"got wrong type of -exif value"``: - -- ``corrupt_exif_wrong_type.jpg`` uses Software (0x0131), a *non-trusted* tag, - so _safe_dump strips it and retries -> dump succeeds. -- ``corrupt_exif_trusted_wrong_type.jpg`` uses ImageDescription (0x010E), a - *trusted* tag, so _safe_dump must re-raise instead of silently dropping it. - -The large-thumbnail fixture embeds a thumbnail above piexif's 64000-byte dump -limit (but still within a single JPEG APP1 segment), so _safe_dump drops the -thumbnail and the 1st IFD and retries: - -- ``corrupt_exif_large_thumbnail.jpg`` - -Run from the repo root to regenerate the committed fixtures: - - uv run python tests/unit/generate_corrupt_exif_image.py -""" - -from __future__ import annotations - -import io -import struct -from pathlib import Path - -import piexif -from PIL import Image - -_TYPE_SHORT = 3 # write the value as a SHORT regardless of the tag's real type - -_SOFTWARE_TAG = 0x0131 # non-trusted ASCII tag -> _safe_dump strips and retries -_IMAGE_DESCRIPTION_TAG = 0x010E # trusted ASCII tag -> _safe_dump re-raises - -# piexif rejects thumbnails larger than 64000 bytes on dump; this size is over -# that limit yet small enough to fit in a JPEG APP1 segment (max 65535 bytes). -_LARGE_THUMBNAIL_SIZE = 64500 - - -def _build_one_entry_tiff(tag: int) -> bytes: - """Little-endian TIFF with one IFD0 entry: ``tag`` stored as a SHORT.""" - header = b"II" + struct.pack(" bytes: - """Return JPEG bytes with a loadable-but-undumpable EXIF block for ``tag``.""" - base = Image.new("RGB", (32, 32), "green") - base_buf = io.BytesIO() - base.save(base_buf, "JPEG") - - exif_bytes = b"Exif\x00\x00" + _build_one_entry_tiff(tag) - out = io.BytesIO() - piexif.insert(exif_bytes, base_buf.getvalue(), out) - return out.getvalue() - - -def _make_oversized_thumbnail() -> bytes: - """A deterministic blob over piexif's 64000-byte thumbnail limit.""" - tiny = io.BytesIO() - Image.new("RGB", (8, 8), "blue").save(tiny, "JPEG") - jpeg = tiny.getvalue() - return jpeg + b"\x00" * (_LARGE_THUMBNAIL_SIZE - len(jpeg)) - - -def _build_thumbnail_tiff(thumbnail: bytes) -> bytes: - """Little-endian TIFF with an empty IFD0 pointing to a thumbnail in IFD1.""" - thumb_offset = 44 # bytes consumed by the header + IFD0 + IFD1 below - out = b"II" + struct.pack(" IFD1 at offset 14 - out += struct.pack(" bytes: - """Return JPEG bytes whose EXIF carries a thumbnail too large to re-dump.""" - base = Image.new("RGB", (100, 100), "red") - base_buf = io.BytesIO() - base.save(base_buf, "JPEG") - - exif_bytes = b"Exif\x00\x00" + _build_thumbnail_tiff(_make_oversized_thumbnail()) - out = io.BytesIO() - piexif.insert(exif_bytes, base_buf.getvalue(), out) - return out.getvalue() - - -def main() -> None: - data_dir = Path(__file__).resolve().parent / "data" - wrong_type_fixtures = { - "corrupt_exif_wrong_type.jpg": _SOFTWARE_TAG, - "corrupt_exif_trusted_wrong_type.jpg": _IMAGE_DESCRIPTION_TAG, - } - for filename, tag in wrong_type_fixtures.items(): - out_path = data_dir / filename - out_path.write_bytes(build_wrong_type_exif_jpeg(tag)) - print(f"Wrote {out_path}") - - large_thumb_path = data_dir / "corrupt_exif_large_thumbnail.jpg" - large_thumb_path.write_bytes(build_large_thumbnail_exif_jpeg()) - print(f"Wrote {large_thumb_path}") - - -if __name__ == "__main__": - main()