Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5ea2340
perf: parse_xml + body mutation optimization
Bonggoprasetyanto Dec 24, 2025
2dd1a29
Fix poetry configuration - add required fields
Bonggoprasetyanto Jan 8, 2026
ec0b7e1
fix: improve XML handling and cleanup code
Bonggoprasetyanto Jan 8, 2026
e455da7
Small comment clean-up
JackByrne Jan 9, 2026
3727096
Merge pull request #630 from start-software/develop
JackByrne May 11, 2026
e0fb809
perf: optimize body replacement and header/footer processing in DocxT…
bonggo-pras May 12, 2026
c82d2a4
Remove logging warnings in template.py
JackByrne May 18, 2026
efd473b
Clarify body-swap docstring and comments
JackByrne May 18, 2026
84c1420
Improve header/footer Jinja detection and fallback
JackByrne May 18, 2026
e5106f3
Optimize resolve_listing with early exit
JackByrne May 18, 2026
a5c3286
Precompile tag-stripping regexes in DocxTemplate
JackByrne May 18, 2026
c042ae2
Remove unused imports from template.py
JackByrne May 18, 2026
ac57d57
Clarify header/footer fallback comment
JackByrne May 18, 2026
a0564e1
Merge pull request #1 from start-software/performance-optimizations
JackByrne May 18, 2026
10079b1
Merge pull request #637 from start-software/develop
JackByrne May 18, 2026
8d48612
Prebuild and cache inline image XML
JackByrne May 18, 2026
ddf1687
Optimize image part deduplication
JackByrne May 18, 2026
4a96bc4
Use descriptor cache for image deduplication
JackByrne May 18, 2026
98d8aba
Cache image metadata instead of XML
JackByrne May 18, 2026
e488653
Handle non-hashable descriptors; escape quotes
JackByrne May 18, 2026
7c52c56
Scan image partnames to derive counter
JackByrne May 18, 2026
7581a33
Always use str(partname) for image parts
JackByrne May 18, 2026
82fd69c
Initialize docx_ids_index from existing docPr ids
JackByrne May 18, 2026
ef56632
Normalize None image filename before escaping
JackByrne May 18, 2026
f316ca8
Skip caching unhashable image descriptors
JackByrne May 18, 2026
47ca344
Merge pull request #2 from start-software/image-optimizations
JackByrne May 18, 2026
177822b
Merge pull request #638 from start-software/develop
JackByrne May 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 103 additions & 6 deletions docxtpl/inline_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,62 @@

@author: Eric Lapouyade
"""
from xml.sax.saxutils import escape as xml_escape

from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.oxml import OxmlElement, parse_xml
from docx.oxml.ns import qn
from docx.oxml.shape import CT_Inline
from docx.shared import Emu


def _build_inline_image_xml_template():
"""Generate the XML format string by calling python-docx with sentinel values.

This ensures the template always matches the installed python-docx version's
XML structure, even after upgrades. We call CT_Inline.new_pic_inline() once
with recognizable sentinel values, serialize to XML, then replace the
sentinels with Python format placeholders.
"""
import uuid

# Use GUIDs for string sentinels - guaranteed no collision with XML content
_RID_SENTINEL = str(uuid.uuid4())
_FILENAME_SENTINEL = str(uuid.uuid4())

# For numeric sentinels, use unique integers derived from UUIDs.
# shape_id is xsd:unsignedInt (max 4,294,967,295 / 32-bit).
# cx/cy are EMU values typed as xsd:long (64-bit).
# All use 9-digit range [100000000, 999999999] to stay within 32-bit
# and avoid any accidental collisions with each other.
_SHAPE_ID = uuid.uuid4().int % (9 * 10**8) + 10**8
_CX_INT = uuid.uuid4().int % (9 * 10**8) + 10**8
_CY_INT = uuid.uuid4().int % (9 * 10**8) + 10**8

inline = CT_Inline.new_pic_inline(
_SHAPE_ID,
_RID_SENTINEL,
_FILENAME_SENTINEL,
Emu(_CX_INT),
Emu(_CY_INT),
)
xml = inline.xml

# Replace sentinel values with format string placeholders
xml = xml.replace(str(_SHAPE_ID), "{shape_id}")
Copy link
Copy Markdown

@augmentcode augmentcode Bot Jun 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The XML template generation relies on plain .replace() for the numeric sentinels (_SHAPE_ID, _CX_INT, _CY_INT), so any accidental collision (e.g., the same 9-digit value appearing in another attribute, or two sentinels matching each other) could substitute {shape_id}/{cx}/{cy} in the wrong place and yield invalid image XML.

Severity: low

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

xml = xml.replace(_RID_SENTINEL, "{rId}")
xml = xml.replace(_FILENAME_SENTINEL, "{filename}")
xml = xml.replace(str(_CX_INT), "{cx}")
xml = xml.replace(str(_CY_INT), "{cy}")

return xml


# Pre-built XML template for inline images, derived from the installed
# python-docx version. Using str.format() on this template avoids calling
# CT_Inline.new_pic_inline() per image (which does 2x parse_xml() +
# element manipulation + .xml serialization each time).
_INLINE_IMAGE_XML = _build_inline_image_xml_template()


class InlineImage(object):
Expand Down Expand Up @@ -50,16 +104,59 @@ def _add_hyperlink(self, run, url, part):
return run

def _insert_image(self):
pic = self.tpl.current_rendering_part.new_pic_inline(
self.image_descriptor,
self.width,
self.height,
).xml
part = self.tpl.current_rendering_part
image_descriptor = self.image_descriptor

# Cache the expensive parts (image part lookup, rId, dimensions) per
# (part, descriptor, width, height). The XML string itself is NOT
# cached because each insertion needs a unique shape_id - header/footer
# and footnote parts are not renumbered by fix_docpr_ids().
cache = self.tpl._image_cache
# For hashable descriptors (strings, paths), cache by value.
# For unhashable descriptors (file-like objects), skip caching
# entirely — using id() would risk aliasing after GC.
try:
cache_key = (id(part), image_descriptor, self.width, self.height)
Copy link
Copy Markdown

@augmentcode augmentcode Bot Jun 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cache_key caching is currently gated on hashability, but many file-like descriptors (e.g., io.BytesIO / file handles) are hashable, so they may still get cached despite the comment saying caching is skipped. If the same stream object is reused/mutated between insertions within a render, this can reuse a stale (rId, cx/cy, filename) tuple and insert the wrong image/dimensions.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

hash(cache_key) is not None # trigger TypeError if unhashable
except TypeError:
cache_key = None

if cache_key is not None and cache_key in cache:
rId, cx, cy, filename = cache[cache_key]
else:
# Get or add the image part with O(1) descriptor-based dedup,
# avoiding the O(n) linear scan in python-docx's default path.
image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
rId = part.relate_to(image_part, RT.IMAGE)
cx, cy = image.scaled_dimensions(self.width, self.height)
# Escape for use inside XML attribute (quotes must be escaped).
# image.filename is None for file-like descriptors (BytesIO);
# normalize to empty string to match python-docx's behavior.
filename = xml_escape(image.filename or "", {'"': """})
if cache_key is not None:
cache[cache_key] = (rId, int(cx), int(cy), filename)

# Always assign a fresh shape_id per insertion so that drawing IDs
# are unique in every part (including headers/footers/footnotes
# which are not renumbered by fix_docpr_ids()).
self.tpl.docx_ids_index += 1
shape_id = self.tpl.docx_ids_index

# Generate XML directly as a string using a pre-built template
# rather than calling CT_Inline.new_pic_inline() per image.
pic = _INLINE_IMAGE_XML.format(
cx=int(cx),
cy=int(cy),
shape_id=shape_id,
filename=filename,
rId=rId,
)

if self.anchor:
run = parse_xml(pic)
if run.xpath(".//a:blip"):
hyperlink = self._add_hyperlink(
run, self.anchor, self.tpl.current_rendering_part
run, self.anchor, part
)
pic = hyperlink.xml

Expand Down
Loading