-
Notifications
You must be signed in to change notification settings - Fork 0
Merge dev changes #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
5ea2340
2dd1a29
ec0b7e1
e455da7
3727096
e0fb809
c82d2a4
efd473b
84c1420
e5106f3
a5c3286
c042ae2
ac57d57
a0564e1
10079b1
8d48612
ddf1687
4a96bc4
98d8aba
e488653
7c52c56
7581a33
82fd69c
ef56632
f316ca8
47ca344
177822b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,8 +4,62 @@ | |
|
|
||
| @author: Eric Lapouyade | ||
| """ | ||
| from xml.sax.saxutils import escape as xml_escape | ||
|
|
||
| from docx.opc.constants import RELATIONSHIP_TYPE as RT | ||
| from docx.oxml import OxmlElement, parse_xml | ||
| from docx.oxml.ns import qn | ||
| from docx.oxml.shape import CT_Inline | ||
| from docx.shared import Emu | ||
|
|
||
|
|
||
| def _build_inline_image_xml_template(): | ||
| """Generate the XML format string by calling python-docx with sentinel values. | ||
|
|
||
| This ensures the template always matches the installed python-docx version's | ||
| XML structure, even after upgrades. We call CT_Inline.new_pic_inline() once | ||
| with recognizable sentinel values, serialize to XML, then replace the | ||
| sentinels with Python format placeholders. | ||
| """ | ||
| import uuid | ||
|
|
||
| # Use GUIDs for string sentinels - guaranteed no collision with XML content | ||
| _RID_SENTINEL = str(uuid.uuid4()) | ||
| _FILENAME_SENTINEL = str(uuid.uuid4()) | ||
|
|
||
| # For numeric sentinels, use unique integers derived from UUIDs. | ||
| # shape_id is xsd:unsignedInt (max 4,294,967,295 / 32-bit). | ||
| # cx/cy are EMU values typed as xsd:long (64-bit). | ||
| # All use 9-digit range [100000000, 999999999] to stay within 32-bit | ||
| # and avoid any accidental collisions with each other. | ||
| _SHAPE_ID = uuid.uuid4().int % (9 * 10**8) + 10**8 | ||
| _CX_INT = uuid.uuid4().int % (9 * 10**8) + 10**8 | ||
| _CY_INT = uuid.uuid4().int % (9 * 10**8) + 10**8 | ||
|
|
||
| inline = CT_Inline.new_pic_inline( | ||
| _SHAPE_ID, | ||
| _RID_SENTINEL, | ||
| _FILENAME_SENTINEL, | ||
| Emu(_CX_INT), | ||
| Emu(_CY_INT), | ||
| ) | ||
| xml = inline.xml | ||
|
|
||
| # Replace sentinel values with format string placeholders | ||
| xml = xml.replace(str(_SHAPE_ID), "{shape_id}") | ||
| xml = xml.replace(_RID_SENTINEL, "{rId}") | ||
| xml = xml.replace(_FILENAME_SENTINEL, "{filename}") | ||
| xml = xml.replace(str(_CX_INT), "{cx}") | ||
| xml = xml.replace(str(_CY_INT), "{cy}") | ||
|
|
||
| return xml | ||
|
|
||
|
|
||
| # Pre-built XML template for inline images, derived from the installed | ||
| # python-docx version. Using str.format() on this template avoids calling | ||
| # CT_Inline.new_pic_inline() per image (which does 2x parse_xml() + | ||
| # element manipulation + .xml serialization each time). | ||
| _INLINE_IMAGE_XML = _build_inline_image_xml_template() | ||
|
|
||
|
|
||
| class InlineImage(object): | ||
|
|
@@ -50,16 +104,59 @@ def _add_hyperlink(self, run, url, part): | |
| return run | ||
|
|
||
| def _insert_image(self): | ||
| pic = self.tpl.current_rendering_part.new_pic_inline( | ||
| self.image_descriptor, | ||
| self.width, | ||
| self.height, | ||
| ).xml | ||
| part = self.tpl.current_rendering_part | ||
| image_descriptor = self.image_descriptor | ||
|
|
||
| # Cache the expensive parts (image part lookup, rId, dimensions) per | ||
| # (part, descriptor, width, height). The XML string itself is NOT | ||
| # cached because each insertion needs a unique shape_id - header/footer | ||
| # and footnote parts are not renumbered by fix_docpr_ids(). | ||
| cache = self.tpl._image_cache | ||
| # For hashable descriptors (strings, paths), cache by value. | ||
| # For unhashable descriptors (file-like objects), skip caching | ||
| # entirely — using id() would risk aliasing after GC. | ||
| try: | ||
| cache_key = (id(part), image_descriptor, self.width, self.height) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Severity: medium 🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage. |
||
| hash(cache_key) is not None # trigger TypeError if unhashable | ||
| except TypeError: | ||
| cache_key = None | ||
|
|
||
| if cache_key is not None and cache_key in cache: | ||
| rId, cx, cy, filename = cache[cache_key] | ||
| else: | ||
| # Get or add the image part with O(1) descriptor-based dedup, | ||
| # avoiding the O(n) linear scan in python-docx's default path. | ||
| image_part, image = self.tpl._get_or_add_image_part(image_descriptor) | ||
| rId = part.relate_to(image_part, RT.IMAGE) | ||
| cx, cy = image.scaled_dimensions(self.width, self.height) | ||
| # Escape for use inside XML attribute (quotes must be escaped). | ||
| # image.filename is None for file-like descriptors (BytesIO); | ||
| # normalize to empty string to match python-docx's behavior. | ||
| filename = xml_escape(image.filename or "", {'"': """}) | ||
| if cache_key is not None: | ||
| cache[cache_key] = (rId, int(cx), int(cy), filename) | ||
|
|
||
| # Always assign a fresh shape_id per insertion so that drawing IDs | ||
| # are unique in every part (including headers/footers/footnotes | ||
| # which are not renumbered by fix_docpr_ids()). | ||
| self.tpl.docx_ids_index += 1 | ||
| shape_id = self.tpl.docx_ids_index | ||
|
|
||
| # Generate XML directly as a string using a pre-built template | ||
| # rather than calling CT_Inline.new_pic_inline() per image. | ||
| pic = _INLINE_IMAGE_XML.format( | ||
| cx=int(cx), | ||
| cy=int(cy), | ||
| shape_id=shape_id, | ||
| filename=filename, | ||
| rId=rId, | ||
| ) | ||
|
|
||
| if self.anchor: | ||
| run = parse_xml(pic) | ||
| if run.xpath(".//a:blip"): | ||
| hyperlink = self._add_hyperlink( | ||
| run, self.anchor, self.tpl.current_rendering_part | ||
| run, self.anchor, part | ||
| ) | ||
| pic = hyperlink.xml | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The XML template generation relies on plain
.replace()for the numeric sentinels (_SHAPE_ID,_CX_INT,_CY_INT), so any accidental collision (e.g., the same 9-digit value appearing in another attribute, or two sentinels matching each other) could substitute{shape_id}/{cx}/{cy}in the wrong place and yield invalid image XML.Severity: low
🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.