From 5ea23403c93e6d91698be18e4ea4aa2a7728df6a Mon Sep 17 00:00:00 2001
From: Bonggo Pras <bonggoprasetyanto@gmail.com>
Date: Wed, 24 Dec 2025 13:57:23 +0700
Subject: [PATCH 01/22] perf: parse_xml + body mutation optimization

---
 docxtpl/template.py | 265 ++++++++++++++++++++++++++++----------------
 1 file changed, 167 insertions(+), 98 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index f20280a..b90c6fc 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -19,6 +19,41 @@
 from jinja2 import Environment, Template, meta
 from jinja2.exceptions import TemplateError
 
+
+def _create_optimized_env(**kwargs):
+    """Create an optimized Jinja2 environment for better performance.
+    
+    Optimizations applied:
+    - auto_reload=False: Skip checking if template source changed
+    - cache_size=400: Larger template cache for repeated renders
+    - enable_async=False: Disable async support (not needed, adds overhead)
+    """
+    return Environment(
+        auto_reload=False,      # Disable template auto-reload (faster)
+        cache_size=400,         # Increase template cache size
+        enable_async=False,     # Disable async (not needed, reduces overhead)
+        **kwargs
+    )
+
+
+# Module-level cached environments (created once, reused across all instances)
+_CACHED_ENV = None
+_CACHED_ENV_AUTOESCAPE = None
+
+
+def _get_cached_env(autoescape=False):
+    """Get or create a cached Jinja2 environment for performance."""
+    global _CACHED_ENV, _CACHED_ENV_AUTOESCAPE
+    
+    if autoescape:
+        if _CACHED_ENV_AUTOESCAPE is None:
+            _CACHED_ENV_AUTOESCAPE = _create_optimized_env(autoescape=True)
+        return _CACHED_ENV_AUTOESCAPE
+    else:
+        if _CACHED_ENV is None:
+            _CACHED_ENV = _create_optimized_env(autoescape=False)
+        return _CACHED_ENV
+
 try:
     from html import escape  # noqa: F401
 except ImportError:
@@ -43,6 +78,60 @@ class DocxTemplate(object):
         "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
     )
 
+    # Pre-compiled regex patterns for patch_xml() optimization
+    # These are compiled once at class load time, not on every render
+    _RE_JINJA_OPEN = re.compile(
+        r"(?<={)(<[^>]*>)+(?=[\{%\#])|(?<=[%\}#])(<[^>]*>)+(?=\})",
+        re.DOTALL
+    )
+    _RE_JINJA_CONTENT = re.compile(
+        r"{%(?:(?!%}).)*|{#(?:(?!#}).)*|{{(?:(?!}}).)*",
+        re.DOTALL
+    )
+    _RE_COLSPAN = re.compile(
+        r"(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*colspan\s+([^%]*)\s*%}(.*?</w:tc>)",
+        re.DOTALL
+    )
+    _RE_CELLBG = re.compile(
+        r"(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*cellbg\s+([^%]*)\s*%}(.*?</w:tc>)",
+        re.DOTALL
+    )
+    _RE_SPACE_PRESERVE = re.compile(
+        r"<w:t>((?:(?!<w:t>).)*)({{.*?}}|{%.*?%})",
+        re.DOTALL
+    )
+    _RE_SPACE_PRESERVE_R = re.compile(
+        r"({{r\s.*?}}|{%r\s.*?%})",
+        re.DOTALL
+    )
+    _RE_MERGE_PREV = re.compile(r"</w:t>(?:(?!</w:t>).)*?{%-", re.DOTALL)
+    _RE_MERGE_NEXT = re.compile(r"-%}(?:(?!<w:t[ >]|{%|{{).)*?<w:t[^>]*?>", re.DOTALL)
+    _RE_VMERGE = re.compile(
+        r"<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*vm\s*%}.*?</w:tc[ >]",
+        re.DOTALL
+    )
+    _RE_HMERGE = re.compile(
+        r"<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*hm\s*%}.*?</w:tc[ >]",
+        re.DOTALL
+    )
+    _RE_CLEAN_TAGS = re.compile(r"(?<=\{[\{%])(.*?)(?=[\}%]})")
+    _RE_PARAGRAPH_NEWLINE = re.compile(r"<w:p([ >])")
+    _RE_PARAGRAPH_REMOVE_NEWLINE = re.compile(r"\n<w:p([ >])")
+    _RE_STRIPTAGS = re.compile(r"</w:t>.*?(<w:t>|<w:t [^>]*>)", re.DOTALL)
+    _RE_COLSPAN_EMPTY = re.compile(r"<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>", re.DOTALL)
+    _RE_GRIDSPAN = re.compile(r"<w:gridSpan[^/]*/>")
+    _RE_TCPR = re.compile(r"(<w:tcPr[^>]*>)")
+    _RE_SHD = re.compile(r"<w:shd[^/]*/>")
+    _RE_RESOLVE_PARAGRAPH = re.compile(r"<w:p(?: [^>]*)?>.*?</w:p>", re.DOTALL)
+    _RE_RESOLVE_RUN = re.compile(r"<w:r(?: [^>]*)?>.*?</w:r>", re.DOTALL)
+    _RE_RESOLVE_TEXT = re.compile(r"<w:t(?: [^>]*)?>.*?</w:t>", re.DOTALL)
+    _RE_RUN_PROPS = re.compile(r"<w:rPr>.*?</w:rPr>")
+    _RE_PARA_PROPS = re.compile(r"<w:pPr>.*?</w:pPr>")
+
+    # Cached Jinja2 environment for performance (created once, reused)
+    _cached_jinja_env = None
+    _cached_jinja_env_autoescape = None  # For autoescape=True variant
+
     def __init__(self, template_file: Union[IO[bytes], str, PathLike]) -> None:
         self.template_file = template_file
         self.reset_replacements()
@@ -88,94 +177,63 @@ def patch_xml(self, src_xml):
         unescape html entities, etc..."""
 
         # replace {<something>{ by {{   ( works with {{ }} {% and %} {# and #})
-        src_xml = re.sub(
-            r"(?<={)(<[^>]*>)+(?=[\{%\#])|(?<=[%\}\#])(<[^>]*>)+(?=\})",
-            "",
-            src_xml,
-            flags=re.DOTALL,
-        )
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_JINJA_OPEN.sub("", src_xml)
 
         # replace {{<some tags>jinja2 stuff<some other tags>}} by {{jinja2 stuff}}
         # same thing with {% ... %} and {# #}
         # "jinja2 stuff" could a variable, a 'if' etc... anything jinja2 will understand
         def striptags(m):
-            return re.sub(
-                "</w:t>.*?(<w:t>|<w:t [^>]*>)", "", m.group(0), flags=re.DOTALL
-            )
+            # OPTIMIZED: Using pre-compiled pattern
+            return self._RE_STRIPTAGS.sub("", m.group(0))
 
-        src_xml = re.sub(
-            r"{%(?:(?!%}).)*|{#(?:(?!#}).)*|{{(?:(?!}}).)*",
-            striptags,
-            src_xml,
-            flags=re.DOTALL,
-        )
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_JINJA_CONTENT.sub(striptags, src_xml)
 
         # manage table cell colspan
         def colspan(m):
             cell_xml = m.group(1) + m.group(3)
-            cell_xml = re.sub(
-                r"<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>",
-                "",
-                cell_xml,
-                flags=re.DOTALL,
-            )
-            cell_xml = re.sub(r"<w:gridSpan[^/]*/>", "", cell_xml, count=1)
-            return re.sub(
-                r"(<w:tcPr[^>]*>)",
+            # OPTIMIZED: Using pre-compiled pattern
+            cell_xml = self._RE_COLSPAN_EMPTY.sub("", cell_xml)
+            cell_xml = self._RE_GRIDSPAN.sub("", cell_xml, count=1)
+            return self._RE_TCPR.sub(
                 r'\1<w:gridSpan w:val="{{%s}}"/>' % m.group(2),
                 cell_xml,
             )
 
-        src_xml = re.sub(
-            r"(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*colspan\s+([^%]*)\s*%}(.*?</w:tc>)",
-            colspan,
-            src_xml,
-            flags=re.DOTALL,
-        )
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_COLSPAN.sub(colspan, src_xml)
 
         # manage table cell background color
         def cellbg(m):
             cell_xml = m.group(1) + m.group(3)
-            cell_xml = re.sub(
-                r"<w:r[ >](?:(?!<w:r[ >]).)*<w:t></w:t>.*?</w:r>",
-                "",
-                cell_xml,
-                flags=re.DOTALL,
-            )
-            cell_xml = re.sub(r"<w:shd[^/]*/>", "", cell_xml, count=1)
-            return re.sub(
-                r"(<w:tcPr[^>]*>)",
+            # OPTIMIZED: Using pre-compiled pattern
+            cell_xml = self._RE_COLSPAN_EMPTY.sub("", cell_xml)
+            cell_xml = self._RE_SHD.sub("", cell_xml, count=1)
+            return self._RE_TCPR.sub(
                 r'\1<w:shd w:val="clear" w:color="auto" w:fill="{{%s}}"/>' % m.group(2),
                 cell_xml,
             )
 
-        src_xml = re.sub(
-            r"(<w:tc[ >](?:(?!<w:tc[ >]).)*){%\s*cellbg\s+([^%]*)\s*%}(.*?</w:tc>)",
-            cellbg,
-            src_xml,
-            flags=re.DOTALL,
-        )
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_CELLBG.sub(cellbg, src_xml)
 
         # ensure space preservation
-        src_xml = re.sub(
-            r"<w:t>((?:(?!<w:t>).)*)({{.*?}}|{%.*?%})",
+        # OPTIMIZED: Using pre-compiled patterns
+        src_xml = self._RE_SPACE_PRESERVE.sub(
             r'<w:t xml:space="preserve">\1\2',
             src_xml,
-            flags=re.DOTALL,
         )
-        src_xml = re.sub(
-            r"({{r\s.*?}}|{%r\s.*?%})",
+        src_xml = self._RE_SPACE_PRESERVE_R.sub(
             r'</w:t></w:r><w:r><w:t xml:space="preserve">\1</w:t></w:r><w:r><w:t xml:space="preserve">',
             src_xml,
-            flags=re.DOTALL,
         )
 
         # {%- will merge with previous paragraph text
-        src_xml = re.sub(r"</w:t>(?:(?!</w:t>).)*?{%-", "{%", src_xml, flags=re.DOTALL)
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_MERGE_PREV.sub("{%", src_xml)
         # -%} will merge with next paragraph text
-        src_xml = re.sub(
-            r"-%}(?:(?!<w:t[ >]|{%|{{).)*?<w:t[^>]*?>", "%}", src_xml, flags=re.DOTALL
-        )
+        src_xml = self._RE_MERGE_NEXT.sub("%}", src_xml)
 
         for y in ["tr", "tc", "p", "r"]:
             # replace into xml code the row/paragraph/run containing
@@ -220,12 +278,8 @@ def v_merge(m1):
                 flags=re.DOTALL,
             )
 
-        src_xml = re.sub(
-            r"<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*vm\s*%}.*?</w:tc[ >]",
-            v_merge_tc,
-            src_xml,
-            flags=re.DOTALL,
-        )
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_VMERGE.sub(v_merge_tc, src_xml)
 
         # Use ``{% hm %}`` to make table cell become horizontally merged within
         # a ``{% for %}``.
@@ -279,12 +333,8 @@ def without_gridspan(m2):
             # Discard every other cell generated in loop.
             return "{% if loop.first %}" + xml + "{% endif %}"
 
-        src_xml = re.sub(
-            r"<w:tc[ >](?:(?!<w:tc[ >]).)*?{%\s*hm\s*%}.*?</w:tc[ >]",
-            h_merge_tc,
-            src_xml,
-            flags=re.DOTALL,
-        )
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_HMERGE.sub(h_merge_tc, src_xml)
 
         def clean_tags(m):
             return (
@@ -298,18 +348,20 @@ def clean_tags(m):
                 .replace("’", "'")
             )
 
-        src_xml = re.sub(r"(?<=\{[\{%])(.*?)(?=[\}%]})", clean_tags, src_xml)
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_CLEAN_TAGS.sub(clean_tags, src_xml)
 
         return src_xml
 
     def render_xml_part(self, src_xml, part, context, jinja_env=None):
-        src_xml = re.sub(r"<w:p([ >])", r"\n<w:p\1", src_xml)
+        # OPTIMIZED: Using pre-compiled pattern
+        src_xml = self._RE_PARAGRAPH_NEWLINE.sub(r"\n<w:p\1", src_xml)
         try:
             self.current_rendering_part = part
-            if jinja_env:
-                template = jinja_env.from_string(src_xml)
-            else:
-                template = Template(src_xml)
+            # OPTIMIZED: Use cached environment (reuse instead of creating new)
+            if not jinja_env:
+                jinja_env = _get_cached_env()
+            template = jinja_env.from_string(src_xml)
             dst_xml = template.render(context)
         except TemplateError as exc:
             if hasattr(exc, "lineno") and exc.lineno is not None:
@@ -320,7 +372,8 @@ def render_xml_part(self, src_xml, part, context, jinja_env=None):
                 )
 
             raise exc
-        dst_xml = re.sub(r"\n<w:p([ >])", r"<w:p\1", dst_xml)
+        # OPTIMIZED: Using pre-compiled pattern
+        dst_xml = self._RE_PARAGRAPH_REMOVE_NEWLINE.sub(r"<w:p\1", dst_xml)
         dst_xml = (
             dst_xml.replace("{_{", "{{")
             .replace("}_}", "}}")
@@ -348,8 +401,9 @@ def render_properties(
             "title",
             # 'version',
         ]
+        # OPTIMIZED: Use cached environment
         if jinja_env is None:
-            jinja_env = Environment()
+            jinja_env = _get_cached_env()
 
         for prop in properties:
             initial = getattr(self.docx.core_properties, prop)
@@ -360,8 +414,9 @@ def render_properties(
     def render_footnotes(
         self, context: Dict[str, Any], jinja_env: Optional[Environment] = None
     ) -> None:
+        # OPTIMIZED: Use cached environment
         if jinja_env is None:
-            jinja_env = Environment()
+            jinja_env = _get_cached_env()
 
         for section in self.docx.sections:
             for part in section.part.package.parts:
@@ -403,30 +458,29 @@ def resolve_text(run_properties, paragraph_properties, m):
             return xml
 
         def resolve_run(paragraph_properties, m):
-            run_properties = re.search(r"<w:rPr>.*?</w:rPr>", m.group(0))
+            # OPTIMIZED: Using pre-compiled pattern
+            run_properties = self._RE_RUN_PROPS.search(m.group(0))
             run_properties = run_properties.group(0) if run_properties else ""
-            return re.sub(
-                r"<w:t(?: [^>]*)?>.*?</w:t>",
+            # OPTIMIZED: Using pre-compiled pattern
+            return self._RE_RESOLVE_TEXT.sub(
                 lambda x: resolve_text(run_properties, paragraph_properties, x),
                 m.group(0),
-                flags=re.DOTALL,
             )
 
         def resolve_paragraph(m):
-            paragraph_properties = re.search(r"<w:pPr>.*?</w:pPr>", m.group(0))
+            # OPTIMIZED: Using pre-compiled pattern
+            paragraph_properties = self._RE_PARA_PROPS.search(m.group(0))
             paragraph_properties = (
                 paragraph_properties.group(0) if paragraph_properties else ""
             )
-            return re.sub(
-                r"<w:r(?: [^>]*)?>.*?</w:r>",
+            # OPTIMIZED: Using pre-compiled pattern
+            return self._RE_RESOLVE_RUN.sub(
                 lambda x: resolve_run(paragraph_properties, x),
                 m.group(0),
-                flags=re.DOTALL,
             )
 
-        xml = re.sub(
-            r"<w:p(?: [^>]*)?>.*?</w:p>", resolve_paragraph, xml, flags=re.DOTALL
-        )
+        # OPTIMIZED: Using pre-compiled pattern
+        xml = self._RE_RESOLVE_PARAGRAPH.sub(resolve_paragraph, xml)
 
         return xml
 
@@ -437,9 +491,21 @@ def build_xml(self, context, jinja_env=None):
         return xml
 
     def map_tree(self, tree):
-        root = self.docx._element
-        body = root.body
-        root.replace(body, tree)
+        """Replace body content with rendered tree.
+        
+        OPTIMIZED: Instead of replacing the entire <w:body> element (which
+        triggers expensive reconciliation), we now mutate the body's children
+        directly. This is much cheaper for large trees.
+        """
+        body = self.docx._element.body
+        
+        # Remove all existing children from body
+        for child in list(body):
+            body.remove(child)
+        
+        # Append all children from the new tree
+        for child in list(tree):
+            body.append(child)
 
     def get_headers_footers(self, uri):
         for relKey, val in self.docx._part.rels.items():
@@ -479,11 +545,11 @@ def render(
         # init template working attributes
         self.render_init()
 
-        if autoescape:
-            if not jinja_env:
-                jinja_env = Environment(autoescape=autoescape)
-            else:
-                jinja_env.autoescape = autoescape
+        # OPTIMIZED: Use cached environment by default (avoids overhead of creating new env)
+        if not jinja_env:
+            jinja_env = _get_cached_env(autoescape=autoescape)
+        elif autoescape:
+            jinja_env.autoescape = autoescape
 
         # Body
         xml_src = self.build_xml(context, jinja_env)
@@ -517,8 +583,10 @@ def render(
     # using of TC tag in for cycle can cause that count of columns does not
     # correspond to real count of columns in row. This function is able to fix it.
     def fix_tables(self, xml):
-        parser = etree.XMLParser(recover=True)
-        tree = etree.fromstring(xml, parser=parser)
+        # OPTIMIZED: Use parse_xml from docx.opc.oxml instead of etree.fromstring
+        # This ensures same document model and element classes, minimizing
+        # reconciliation cost when the tree is later used with map_tree()
+        tree = parse_xml(xml)
         # get namespace
         ns = "{" + tree.nsmap["w"] + "}"
         # walk trough xml and find table
@@ -913,7 +981,8 @@ def get_undeclared_template_variables(
         if jinja_env:
             env = jinja_env
         else:
-            env = Environment()
+            # OPTIMIZED: Use cached environment
+            env = _get_cached_env()
 
         parse_content = env.parse(xml)
         all_variables = meta.find_undeclared_variables(parse_content)

From 2dd1a2955e674cdc67e922aeb8bc8257c0c7ff8c Mon Sep 17 00:00:00 2001
From: Bonggo Pras <bonggoprasetyanto@gmail.com>
Date: Thu, 8 Jan 2026 17:18:39 +0700
Subject: [PATCH 02/22] Fix poetry configuration - add required fields

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 476fc0e..28533c4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,10 @@ repository = "https://github.com/elapouya/python-docx-template.git"
 document = "https://docxtpl.readthedocs.org"
 
 [tool.poetry]
+name = "docxtpl"
 version = "0.0.0"
+description = "Python docx template engine"
+authors = ["Eric Lapouyade <elapouya@proton.me>"]
 
 [tool.poetry.requires-plugins]
 poetry-dynamic-versioning = { version = ">=1.0.0,<2.0.0", extras = ["plugin"] }

From ec0b7e1e3858aa73be11ecdc30ef38f817b53589 Mon Sep 17 00:00:00 2001
From: Bonggo Pras <bonggoprasetyanto@gmail.com>
Date: Thu, 8 Jan 2026 20:27:21 +0700
Subject: [PATCH 03/22] fix: improve XML handling and cleanup code

- Add try/except fallback with recover=True for malformed XML in fix_tables()
- Use OxmlElement with qn() instead of etree.SubElement for new grid columns
- Remove unused _cached_jinja_env variables
- Clean up redundant comments
---
 docxtpl/template.py | 129 +++++++++++++++++++++-----------------------
 1 file changed, 62 insertions(+), 67 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index b90c6fc..272a5fb 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -15,6 +15,8 @@
 from docx.opc.oxml import parse_xml
 from docx.opc.part import XmlPart
 import docx.oxml.ns
+from docx.oxml import OxmlElement
+from docx.oxml.ns import qn
 from docx.opc.constants import RELATIONSHIP_TYPE as REL_TYPE
 from jinja2 import Environment, Template, meta
 from jinja2.exceptions import TemplateError
@@ -128,10 +130,6 @@ class DocxTemplate(object):
     _RE_RUN_PROPS = re.compile(r"<w:rPr>.*?</w:rPr>")
     _RE_PARA_PROPS = re.compile(r"<w:pPr>.*?</w:pPr>")
 
-    # Cached Jinja2 environment for performance (created once, reused)
-    _cached_jinja_env = None
-    _cached_jinja_env_autoescape = None  # For autoescape=True variant
-
     def __init__(self, template_file: Union[IO[bytes], str, PathLike]) -> None:
         self.template_file = template_file
         self.reset_replacements()
@@ -177,23 +175,19 @@ def patch_xml(self, src_xml):
         unescape html entities, etc..."""
 
         # replace {<something>{ by {{   ( works with {{ }} {% and %} {# and #})
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_JINJA_OPEN.sub("", src_xml)
 
         # replace {{<some tags>jinja2 stuff<some other tags>}} by {{jinja2 stuff}}
         # same thing with {% ... %} and {# #}
         # "jinja2 stuff" could a variable, a 'if' etc... anything jinja2 will understand
         def striptags(m):
-            # OPTIMIZED: Using pre-compiled pattern
             return self._RE_STRIPTAGS.sub("", m.group(0))
 
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_JINJA_CONTENT.sub(striptags, src_xml)
 
         # manage table cell colspan
         def colspan(m):
             cell_xml = m.group(1) + m.group(3)
-            # OPTIMIZED: Using pre-compiled pattern
             cell_xml = self._RE_COLSPAN_EMPTY.sub("", cell_xml)
             cell_xml = self._RE_GRIDSPAN.sub("", cell_xml, count=1)
             return self._RE_TCPR.sub(
@@ -201,13 +195,11 @@ def colspan(m):
                 cell_xml,
             )
 
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_COLSPAN.sub(colspan, src_xml)
 
         # manage table cell background color
         def cellbg(m):
             cell_xml = m.group(1) + m.group(3)
-            # OPTIMIZED: Using pre-compiled pattern
             cell_xml = self._RE_COLSPAN_EMPTY.sub("", cell_xml)
             cell_xml = self._RE_SHD.sub("", cell_xml, count=1)
             return self._RE_TCPR.sub(
@@ -215,11 +207,9 @@ def cellbg(m):
                 cell_xml,
             )
 
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_CELLBG.sub(cellbg, src_xml)
 
         # ensure space preservation
-        # OPTIMIZED: Using pre-compiled patterns
         src_xml = self._RE_SPACE_PRESERVE.sub(
             r'<w:t xml:space="preserve">\1\2',
             src_xml,
@@ -230,7 +220,6 @@ def cellbg(m):
         )
 
         # {%- will merge with previous paragraph text
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_MERGE_PREV.sub("{%", src_xml)
         # -%} will merge with next paragraph text
         src_xml = self._RE_MERGE_NEXT.sub("%}", src_xml)
@@ -278,7 +267,6 @@ def v_merge(m1):
                 flags=re.DOTALL,
             )
 
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_VMERGE.sub(v_merge_tc, src_xml)
 
         # Use ``{% hm %}`` to make table cell become horizontally merged within
@@ -333,7 +321,6 @@ def without_gridspan(m2):
             # Discard every other cell generated in loop.
             return "{% if loop.first %}" + xml + "{% endif %}"
 
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_HMERGE.sub(h_merge_tc, src_xml)
 
         def clean_tags(m):
@@ -348,17 +335,14 @@ def clean_tags(m):
                 .replace("’", "'")
             )
 
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_CLEAN_TAGS.sub(clean_tags, src_xml)
 
         return src_xml
 
     def render_xml_part(self, src_xml, part, context, jinja_env=None):
-        # OPTIMIZED: Using pre-compiled pattern
         src_xml = self._RE_PARAGRAPH_NEWLINE.sub(r"\n<w:p\1", src_xml)
         try:
             self.current_rendering_part = part
-            # OPTIMIZED: Use cached environment (reuse instead of creating new)
             if not jinja_env:
                 jinja_env = _get_cached_env()
             template = jinja_env.from_string(src_xml)
@@ -372,7 +356,6 @@ def render_xml_part(self, src_xml, part, context, jinja_env=None):
                 )
 
             raise exc
-        # OPTIMIZED: Using pre-compiled pattern
         dst_xml = self._RE_PARAGRAPH_REMOVE_NEWLINE.sub(r"<w:p\1", dst_xml)
         dst_xml = (
             dst_xml.replace("{_{", "{{")
@@ -401,7 +384,6 @@ def render_properties(
             "title",
             # 'version',
         ]
-        # OPTIMIZED: Use cached environment
         if jinja_env is None:
             jinja_env = _get_cached_env()
 
@@ -414,7 +396,6 @@ def render_properties(
     def render_footnotes(
         self, context: Dict[str, Any], jinja_env: Optional[Environment] = None
     ) -> None:
-        # OPTIMIZED: Use cached environment
         if jinja_env is None:
             jinja_env = _get_cached_env()
 
@@ -458,28 +439,23 @@ def resolve_text(run_properties, paragraph_properties, m):
             return xml
 
         def resolve_run(paragraph_properties, m):
-            # OPTIMIZED: Using pre-compiled pattern
             run_properties = self._RE_RUN_PROPS.search(m.group(0))
             run_properties = run_properties.group(0) if run_properties else ""
-            # OPTIMIZED: Using pre-compiled pattern
             return self._RE_RESOLVE_TEXT.sub(
                 lambda x: resolve_text(run_properties, paragraph_properties, x),
                 m.group(0),
             )
 
         def resolve_paragraph(m):
-            # OPTIMIZED: Using pre-compiled pattern
             paragraph_properties = self._RE_PARA_PROPS.search(m.group(0))
             paragraph_properties = (
                 paragraph_properties.group(0) if paragraph_properties else ""
             )
-            # OPTIMIZED: Using pre-compiled pattern
             return self._RE_RESOLVE_RUN.sub(
                 lambda x: resolve_run(paragraph_properties, x),
                 m.group(0),
             )
 
-        # OPTIMIZED: Using pre-compiled pattern
         xml = self._RE_RESOLVE_PARAGRAPH.sub(resolve_paragraph, xml)
 
         return xml
@@ -524,10 +500,17 @@ def get_headers_footers_encoding(self, xml):
     def build_headers_footers_xml(self, context, uri, jinja_env=None):
         for relKey, part in self.get_headers_footers(uri):
             xml = self.get_part_xml(part)
-            encoding = self.get_headers_footers_encoding(xml)
-            xml = self.patch_xml(xml)
-            xml = self.render_xml_part(xml, part, context, jinja_env)
-            yield relKey, xml.encode(encoding)
+            
+            # Skip rendering if no Jinja tags present
+            # Headers/footers are often static, so this avoids caching/parsing overhead
+            if self._RE_JINJA_OPEN.search(xml) or self._RE_JINJA_CONTENT.search(xml):
+                encoding = self.get_headers_footers_encoding(xml)
+                xml = self.patch_xml(xml)
+                xml = self.render_xml_part(xml, part, context, jinja_env)
+                yield relKey, xml.encode(encoding)
+            else:
+                encoding = self.get_headers_footers_encoding(xml)
+                yield relKey, xml.encode(encoding)
 
     def map_headers_footers_xml(self, relKey, xml):
         part = self.docx._part.rels[relKey].target_part
@@ -545,7 +528,7 @@ def render(
         # init template working attributes
         self.render_init()
 
-        # OPTIMIZED: Use cached environment by default (avoids overhead of creating new env)
+        # Use cached environment by default
         if not jinja_env:
             jinja_env = _get_cached_env(autoescape=autoescape)
         elif autoescape:
@@ -581,24 +564,53 @@ def render(
         self.is_rendered = True
 
     # using of TC tag in for cycle can cause that count of columns does not
-    # correspond to real count of columns in row. This function is able to fix it.
+    # correspond to real count of columns in row.
     def fix_tables(self, xml):
-        # OPTIMIZED: Use parse_xml from docx.opc.oxml instead of etree.fromstring
-        # This ensures same document model and element classes, minimizing
-        # reconciliation cost when the tree is later used with map_tree()
-        tree = parse_xml(xml)
+        # Use parse_xml with safe fallback for malformed XML
+        try:
+            tree = parse_xml(xml)
+        except Exception:
+            # Fallback to permissive parser for malformed XML
+            parser = etree.XMLParser(recover=True)
+            tree = etree.fromstring(xml, parser=parser)
         # get namespace
         ns = "{" + tree.nsmap["w"] + "}"
         # walk trough xml and find table
         for t in tree.iter(ns + "tbl"):
             tblGrid = t.find(ns + "tblGrid")
+            if tblGrid is None:
+                continue
+                
             columns = tblGrid.findall(ns + "gridCol")
-            to_add = 0
-            # walk trough all rows and try to find if there is higher cell count
+            columns_len = len(columns)
+            
+            # Single pass row analysis with both counters
+            # Original logic uses raw count for ADD, effective count for REMOVE
+            max_raw_cells = 0       # For ADD decision (raw tc count)
+            max_effective_cells = 0  # For REMOVE decision (with gridSpan)
+            
             for r in t.iter(ns + "tr"):
                 cells = r.findall(ns + "tc")
-                if (len(columns) + to_add) < len(cells):
-                    to_add = len(cells) - len(columns)
+                raw_count = len(cells)
+                effective_count = 0
+                
+                for cell in cells:
+                    tc_pr = cell.find(ns + "tcPr")
+                    if tc_pr is not None:
+                        grid_span = tc_pr.find(ns + "gridSpan")
+                        if grid_span is not None:
+                            effective_count += int(grid_span.get(ns + "val"))
+                            continue
+                    effective_count += 1
+                
+                if raw_count > max_raw_cells:
+                    max_raw_cells = raw_count
+                if effective_count > max_effective_cells:
+                    max_effective_cells = effective_count
+            
+            # ADD columns based on RAW cell count (original behavior)
+            to_add = max_raw_cells - columns_len if max_raw_cells > columns_len else 0
+            
             # is necessary to add columns?
             if to_add > 0:
                 # at first, calculate width of table according to columns
@@ -620,34 +632,16 @@ def fix_tables(self, xml):
                                 int(float(c.get(ns + "w")) * new_average / old_average)
                             ),
                         )
-                    # add new columns
+                    # add new columns using OxmlElement for proper python-docx compatibility
                     for i in range(to_add):
-                        etree.SubElement(
-                            tblGrid, ns + "gridCol", {ns + "w": str(int(new_average))}
-                        )
+                        new_col = OxmlElement('w:gridCol')
+                        new_col.set(qn('w:w'), str(int(new_average)))
+                        tblGrid.append(new_col)
 
-            # Refetch columns after columns addition.
+            # REMOVE columns based on EFFECTIVE cell count (original behavior)
             columns = tblGrid.findall(ns + "gridCol")
             columns_len = len(columns)
-
-            cells_len_max = 0
-
-            def get_cell_len(total, cell):
-                tc_pr = cell.find(ns + "tcPr")
-                grid_span = None if tc_pr is None else tc_pr.find(ns + "gridSpan")
-
-                if grid_span is not None:
-                    return total + int(grid_span.get(ns + "val"))
-
-                return total + 1
-
-            # Calculate max of table cells to compare with `gridCol`.
-            for r in t.iter(ns + "tr"):
-                cells = r.findall(ns + "tc")
-                cells_len = functools.reduce(get_cell_len, cells, 0)
-                cells_len_max = max(cells_len_max, cells_len)
-
-            to_remove = columns_len - cells_len_max
+            to_remove = columns_len - max_effective_cells if columns_len > max_effective_cells else 0
 
             # If after the loop, there're less columns, than
             # originally was, remove extra `gridCol` declarations.
@@ -676,8 +670,10 @@ def get_cell_len(total, cell):
         return tree
 
     def fix_docpr_ids(self, tree):
-        # some Ids may have some collisions : so renumbering all of them :
-        for elt in tree.xpath("//wp:docPr", namespaces=docx.oxml.ns.nsmap):
+        wp_ns = docx.oxml.ns.nsmap['wp']
+        tag = "{%s}docPr" % wp_ns
+        
+        for elt in tree.iter(tag):
             self.docx_ids_index += 1
             elt.attrib["id"] = str(self.docx_ids_index)
 
@@ -981,7 +977,6 @@ def get_undeclared_template_variables(
         if jinja_env:
             env = jinja_env
         else:
-            # OPTIMIZED: Use cached environment
             env = _get_cached_env()
 
         parse_content = env.parse(xml)

From e455da743210f75de24640cb4d93582d48b52b07 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Fri, 9 Jan 2026 15:35:29 +0000
Subject: [PATCH 04/22] Small comment clean-up

---
 docxtpl/template.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 272a5fb..a757037 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -469,7 +469,7 @@ def build_xml(self, context, jinja_env=None):
     def map_tree(self, tree):
         """Replace body content with rendered tree.
         
-        OPTIMIZED: Instead of replacing the entire <w:body> element (which
+        Instead of replacing the entire <w:body> element with replace() (which
         triggers expensive reconciliation), we now mutate the body's children
         directly. This is much cheaper for large trees.
         """
@@ -568,9 +568,9 @@ def render(
     def fix_tables(self, xml):
         # Use parse_xml with safe fallback for malformed XML
         try:
-            tree = parse_xml(xml)
+            tree = parse_xml(xml) # parse_xml() is significantly faster
         except Exception:
-            # Fallback to permissive parser for malformed XML
+            # Fallback to permissive parser in the event of malformed XML
             parser = etree.XMLParser(recover=True)
             tree = etree.fromstring(xml, parser=parser)
         # get namespace
@@ -585,7 +585,6 @@ def fix_tables(self, xml):
             columns_len = len(columns)
             
             # Single pass row analysis with both counters
-            # Original logic uses raw count for ADD, effective count for REMOVE
             max_raw_cells = 0       # For ADD decision (raw tc count)
             max_effective_cells = 0  # For REMOVE decision (with gridSpan)
             
@@ -670,6 +669,7 @@ def fix_tables(self, xml):
         return tree
 
     def fix_docpr_ids(self, tree):
+        # Some Ids may have some collisions : so renumbering all of them
         wp_ns = docx.oxml.ns.nsmap['wp']
         tag = "{%s}docPr" % wp_ns
         

From e0fb809457cc410ca4aba52aca9c73cc4508645c Mon Sep 17 00:00:00 2001
From: bonggo-pras <userpras@gmail.com>
Date: Tue, 12 May 2026 16:15:10 +0700
Subject: [PATCH 05/22] perf: optimize body replacement and header/footer
 processing in DocxTemplate

---
 docxtpl/template.py | 97 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 74 insertions(+), 23 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index a757037..e0b2036 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -18,9 +18,12 @@
 from docx.oxml import OxmlElement
 from docx.oxml.ns import qn
 from docx.opc.constants import RELATIONSHIP_TYPE as REL_TYPE
+import logging
 from jinja2 import Environment, Template, meta
 from jinja2.exceptions import TemplateError
 
+logger = logging.getLogger(__name__)
+
 
 def _create_optimized_env(**kwargs):
     """Create an optimized Jinja2 environment for better performance.
@@ -130,6 +133,10 @@ class DocxTemplate(object):
     _RE_RUN_PROPS = re.compile(r"<w:rPr>.*?</w:rPr>")
     _RE_PARA_PROPS = re.compile(r"<w:pPr>.*?</w:pPr>")
 
+    # Precompiled pattern for fast detection of any Jinja syntax in a string.
+    # Used in render() to skip header/footer processing when no tags are present.
+    _JINJA_PATTERN = re.compile(r'\{\{|\{%|\{#')
+
     def __init__(self, template_file: Union[IO[bytes], str, PathLike]) -> None:
         self.template_file = template_file
         self.reset_replacements()
@@ -467,21 +474,54 @@ def build_xml(self, context, jinja_env=None):
         return xml
 
     def map_tree(self, tree):
-        """Replace body content with rendered tree.
-        
-        Instead of replacing the entire <w:body> element with replace() (which
-        triggers expensive reconciliation), we now mutate the body's children
-        directly. This is much cheaper for large trees.
+        """Replace the body element with the rendered tree.
+
+        Uses root.remove() + root.insert(index) instead of root.replace() to
+        avoid lxml's O(n) recursive cleanup on large XML trees.  The body
+        index is located first so document element order (body before sectPr)
+        is preserved.
+
+        SAFETY: If the body is not a direct child of root (malformed template)
+        or if remove/insert raises for any reason, we fall back to copying
+        children so rendering is never broken by this optimisation.
         """
-        body = self.docx._element.body
-        
-        # Remove all existing children from body
-        for child in list(body):
-            body.remove(child)
-        
-        # Append all children from the new tree
-        for child in list(tree):
-            body.append(child)
+        root = self.docx._element
+        old_body = root.body
+
+        # Locate the body's position among root's direct children.
+        body_index = None
+        for i, child in enumerate(root):
+            if child is old_body:
+                body_index = i
+                break
+
+        if body_index is None:
+            # Malformed template – body is not a direct child; fall back.
+            logger.warning(
+                "map_tree: body is not a direct child of root (malformed template?). "
+                "Falling back to child-copy implementation."
+            )
+            for child in list(old_body):
+                old_body.remove(child)
+            for child in list(tree):
+                old_body.append(child)
+            return
+
+        try:
+            root.remove(old_body)
+            root.insert(body_index, tree)
+        except Exception:
+            logger.warning(
+                "map_tree: optimized remove/insert failed; falling back to child-copy.",
+                exc_info=True,
+            )
+            # Re-attach old_body if it was already removed before the failure.
+            if old_body.getparent() is None:
+                root.insert(body_index, old_body)
+            for child in list(old_body):
+                old_body.remove(child)
+            for child in list(tree):
+                old_body.append(child)
 
     def get_headers_footers(self, uri):
         for relKey, val in self.docx._part.rels.items():
@@ -546,15 +586,26 @@ def render(
         # Replace body xml tree
         self.map_tree(tree)
 
-        # Headers
-        headers = self.build_headers_footers_xml(context, self.HEADER_URI, jinja_env)
-        for relKey, xml in headers:
-            self.map_headers_footers_xml(relKey, xml)
-
-        # Footers
-        footers = self.build_headers_footers_xml(context, self.FOOTER_URI, jinja_env)
-        for relKey, xml in footers:
-            self.map_headers_footers_xml(relKey, xml)
+        # Headers & Footers – skip entirely when no Jinja tags are present to
+        # avoid unnecessary XML parsing, patch_xml, and part replacement.
+        for uri in (self.HEADER_URI, self.FOOTER_URI):
+            try:
+                has_jinja = any(
+                    self._JINJA_PATTERN.search(self.get_part_xml(part))
+                    for _relKey, part in self.get_headers_footers(uri)
+                )
+                if has_jinja:
+                    for relKey, xml in self.build_headers_footers_xml(context, uri, jinja_env):
+                        self.map_headers_footers_xml(relKey, xml)
+            except Exception:
+                logger.warning(
+                    "render: header/footer Jinja-tag check failed for %s; "
+                    "falling back to full processing.",
+                    uri,
+                    exc_info=True,
+                )
+                for relKey, xml in self.build_headers_footers_xml(context, uri, jinja_env):
+                    self.map_headers_footers_xml(relKey, xml)
 
         self.render_properties(context, jinja_env)
 

From c82d2a449bdb37c3c22792b5aa2e21c4037aa616 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 11:21:13 +0100
Subject: [PATCH 06/22] Remove logging warnings in template.py

Delete the module-level logger and several logger.warning calls in docxtpl/template.py. Added while debugging and should be removed.
---
 docxtpl/template.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index e0b2036..e4ba92c 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -22,8 +22,6 @@
 from jinja2 import Environment, Template, meta
 from jinja2.exceptions import TemplateError
 
-logger = logging.getLogger(__name__)
-
 
 def _create_optimized_env(**kwargs):
     """Create an optimized Jinja2 environment for better performance.
@@ -497,10 +495,6 @@ def map_tree(self, tree):
 
         if body_index is None:
             # Malformed template – body is not a direct child; fall back.
-            logger.warning(
-                "map_tree: body is not a direct child of root (malformed template?). "
-                "Falling back to child-copy implementation."
-            )
             for child in list(old_body):
                 old_body.remove(child)
             for child in list(tree):
@@ -511,10 +505,6 @@ def map_tree(self, tree):
             root.remove(old_body)
             root.insert(body_index, tree)
         except Exception:
-            logger.warning(
-                "map_tree: optimized remove/insert failed; falling back to child-copy.",
-                exc_info=True,
-            )
             # Re-attach old_body if it was already removed before the failure.
             if old_body.getparent() is None:
                 root.insert(body_index, old_body)
@@ -598,12 +588,6 @@ def render(
                     for relKey, xml in self.build_headers_footers_xml(context, uri, jinja_env):
                         self.map_headers_footers_xml(relKey, xml)
             except Exception:
-                logger.warning(
-                    "render: header/footer Jinja-tag check failed for %s; "
-                    "falling back to full processing.",
-                    uri,
-                    exc_info=True,
-                )
                 for relKey, xml in self.build_headers_footers_xml(context, uri, jinja_env):
                     self.map_headers_footers_xml(relKey, xml)
 

From efd473b7119034988f7f7168fc23128c25867bed Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 11:34:51 +0100
Subject: [PATCH 07/22] Clarify body-swap docstring and comments

Improve documentation in map_tree to explain the optimization: the code swaps the entire <w:body> via root.remove() + root.insert() to avoid O(n) per-child lxml operations, which is effectively O(1) on the document root. Clarify that the body's index is preserved so element order (body before sectPr) remains intact, and spell out the fallback behavior (child-by-child copy) if the body isn't a direct child or if remove/insert fails. Add additional safety and explanatory comments.
---
 docxtpl/template.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index e4ba92c..a44b18c 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -474,19 +474,24 @@ def build_xml(self, context, jinja_env=None):
     def map_tree(self, tree):
         """Replace the body element with the rendered tree.
 
-        Uses root.remove() + root.insert(index) instead of root.replace() to
-        avoid lxml's O(n) recursive cleanup on large XML trees.  The body
-        index is located first so document element order (body before sectPr)
-        is preserved.
+        Instead of iterating over all body children to remove/re-append them
+        one-by-one (O(n) lxml operations, each with internal bookkeeping),
+        we swap the entire <w:body> element in the document root using
+        root.remove() + root.insert(). This is O(1) since the root element
+        (<w:document>) has only a handful of direct children.
+
+        The body's index is located first so document element order is
+        preserved (e.g. body before sectPr).
 
         SAFETY: If the body is not a direct child of root (malformed template)
-        or if remove/insert raises for any reason, we fall back to copying
-        children so rendering is never broken by this optimisation.
+        or if remove/insert raises for any reason, we fall back to the slower
+        child-by-child copy so rendering is never broken.
         """
         root = self.docx._element
         old_body = root.body
 
-        # Locate the body's position among root's direct children.
+        # Find where <w:body> sits among root's direct children so we can
+        # re-insert the new tree at the same position.
         body_index = None
         for i, child in enumerate(root):
             if child is old_body:
@@ -494,7 +499,8 @@ def map_tree(self, tree):
                 break
 
         if body_index is None:
-            # Malformed template – body is not a direct child; fall back.
+            # Malformed template – body is not a direct child of root.
+            # Fall back to child-by-child replacement on the existing body.
             for child in list(old_body):
                 old_body.remove(child)
             for child in list(tree):
@@ -502,10 +508,15 @@ def map_tree(self, tree):
             return
 
         try:
+            # Detach the old body and insert the new tree (which is itself a
+            # <w:body> element returned by fix_tables/parse_xml) at the same
+            # position. This avoids O(n) per-child remove/append calls.
             root.remove(old_body)
             root.insert(body_index, tree)
         except Exception:
-            # Re-attach old_body if it was already removed before the failure.
+            # If something went wrong, restore the document to a usable state
+            # by re-attaching the old body (if it was already detached) and
+            # falling back to child-by-child copy.
             if old_body.getparent() is None:
                 root.insert(body_index, old_body)
             for child in list(old_body):

From 84c14206946f98c4195023b7507280394ffd07ac Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 11:46:14 +0100
Subject: [PATCH 08/22] Improve header/footer Jinja detection and fallback

Enhance header/footer processing by detecting Jinja tags split across Word XML runs: check both intact tags (_JINJA_PATTERN) and open-tag fragments (_RE_JINJA_OPEN) when scanning part XML. Use a generator to iterate part XML strings once, and keep the existing exception fallback to unconditionally render headers/footers if the fast-path check fails (e.g. malformed XML). Also add clarifying comments about properties and footnotes skipping behaviour and make minor comment style fixes.
---
 docxtpl/template.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index a44b18c..1a0f5b0 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -578,38 +578,49 @@ def render(
         # Body
         xml_src = self.build_xml(context, jinja_env)
 
-        # fix tables if needed
+        # Fix tables if needed
         tree = self.fix_tables(xml_src)
 
-        # fix docPr ID's
+        # Fix docPr ID's
         self.fix_docpr_ids(tree)
 
         # Replace body xml tree
         self.map_tree(tree)
 
-        # Headers & Footers – skip entirely when no Jinja tags are present to
-        # avoid unnecessary XML parsing, patch_xml, and part replacement.
+        # Headers & Footers - skip when no Jinja tags are present.
+        # Uses both _JINJA_PATTERN (intact tags) and _RE_JINJA_OPEN (tags
+        # split across XML runs by Word). Falls back to full render on error.
         for uri in (self.HEADER_URI, self.FOOTER_URI):
             try:
                 has_jinja = any(
-                    self._JINJA_PATTERN.search(self.get_part_xml(part))
-                    for _relKey, part in self.get_headers_footers(uri)
+                    self._JINJA_PATTERN.search(xml)
+                    or self._RE_JINJA_OPEN.search(xml)
+                    for xml in (
+                        self.get_part_xml(part)
+                        for _relKey, part in self.get_headers_footers(uri)
+                    )
                 )
                 if has_jinja:
                     for relKey, xml in self.build_headers_footers_xml(context, uri, jinja_env):
                         self.map_headers_footers_xml(relKey, xml)
             except Exception:
+                # Fallback: if the fast-path check raises (e.g. malformed XML
+                # in a part), process all headers/footers unconditionally.
                 for relKey, xml in self.build_headers_footers_xml(context, uri, jinja_env):
                     self.map_headers_footers_xml(relKey, xml)
 
+        # Properties: no skip-check needed - these are a handful of short
+        # strings (author, title, etc.) where from_string() is near-zero cost.
         self.render_properties(context, jinja_env)
 
+        # Footnotes: no skip-check needed - at most one part exists in typical
+        # documents, and many have none, so the loop body rarely executes.
         self.render_footnotes(context, jinja_env)
 
         # set rendered flag
         self.is_rendered = True
 
-    # using of TC tag in for cycle can cause that count of columns does not
+    # Using of TC tag in for cycle can cause that count of columns does not
     # correspond to real count of columns in row.
     def fix_tables(self, xml):
         # Use parse_xml with safe fallback for malformed XML

From e5106f3a2caf1ab216e07f0341c57406be026f12 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 12:08:28 +0100
Subject: [PATCH 09/22] Optimize resolve_listing with early exit

Add a fast-path to DocxTemplate.resolve_listing that returns the input XML unchanged when no Listing special characters are present. The check looks for tab, newline, bell and form-feed ("\t", "\n", "\a", "\f") and avoids running the heavier resolution logic in the common case, improving performance without changing behavior.
---
 docxtpl/template.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 1a0f5b0..8dfd6b3 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -419,6 +419,10 @@ def render_footnotes(
                     part._blob = xml.encode("utf-8")
 
     def resolve_listing(self, xml):
+        # Early exit: if no Listing special characters are present (common case),
+        # there's nothing to resolve, skip the work below.
+        if "\t" not in xml and "\n" not in xml and "\a" not in xml and "\f" not in xml:
+            return xml
 
         def resolve_text(run_properties, paragraph_properties, m):
             xml = m.group(0).replace(

From a5c3286d711362184d036934abff7bf47c315e39 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 12:22:36 +0100
Subject: [PATCH 10/22] Precompile tag-stripping regexes in DocxTemplate

Introduce pre-compiled regex patterns (_RE_TAG_STRIP and _RE_COMMENT_STRIP) to strip surrounding <w:y> tags from template tags like {%y ...%}, {{y ...}} and comments {#y ...#}. Replace repeated re.sub loops with iteration over these patterns to avoid recompiling the same regexes on every call, reduce code duplication, and improve performance/maintainability.
---
 docxtpl/template.py | 48 +++++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 8dfd6b3..66f963c 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -131,6 +131,26 @@ class DocxTemplate(object):
     _RE_RUN_PROPS = re.compile(r"<w:rPr>.*?</w:rPr>")
     _RE_PARA_PROPS = re.compile(r"<w:pPr>.*?</w:pPr>")
 
+    # Pre-compiled patterns for tag-stripping in patch_xml().
+    # Strips surrounding <w:y> tags from {%y ...%} / {{y ...}} template tags.
+    _RE_TAG_STRIP = tuple(
+        re.compile(
+            r"<w:%s[ >](?:(?!<w:%s[ >]).)*({%%|{{)%s ([^}%%]*(?:%%}|}})).*?</w:%s>"
+            % (y, y, y, y),
+            re.DOTALL,
+        )
+        for y in ("tr", "tc", "p", "r")
+    )
+    # Same for {#y ...#} comment tags (not 'r' - comments in runs are uncommon).
+    _RE_COMMENT_STRIP = tuple(
+        re.compile(
+            r"<w:%s[ >](?:(?!<w:%s[ >]).)*({#)%s ([^}#]*(?:#})).*?</w:%s>"
+            % (y, y, y, y),
+            re.DOTALL,
+        )
+        for y in ("tr", "tc", "p")
+    )
+
     # Precompiled pattern for fast detection of any Jinja syntax in a string.
     # Used in render() to skip header/footer processing when no tags are present.
     _JINJA_PATTERN = re.compile(r'\{\{|\{%|\{#')
@@ -229,25 +249,15 @@ def cellbg(m):
         # -%} will merge with next paragraph text
         src_xml = self._RE_MERGE_NEXT.sub("%}", src_xml)
 
-        for y in ["tr", "tc", "p", "r"]:
-            # replace into xml code the row/paragraph/run containing
-            # {%y xxx %} or {{y xxx}} template tag
-            # by {% xxx %} or {{ xx }} without any surrounding <w:y> tags :
-            # This is mandatory to have jinja2 generating correct xml code
-            pat = (
-                r"<w:%(y)s[ >](?:(?!<w:%(y)s[ >]).)*({%%|{{)%(y)s ([^}%%]*(?:%%}|}})).*?</w:%(y)s>"
-                % {"y": y}
-            )
-            src_xml = re.sub(pat, r"\1 \2", src_xml, flags=re.DOTALL)
-
-        for y in ["tr", "tc", "p"]:
-            # same thing, but for {#y xxx #} (but not where y == 'r', since that
-            # makes less sense to use comments in that context
-            pat = (
-                r"<w:%(y)s[ >](?:(?!<w:%(y)s[ >]).)*({#)%(y)s ([^}#]*(?:#})).*?</w:%(y)s>"
-                % {"y": y}
-            )
-            src_xml = re.sub(pat, r"\1 \2", src_xml, flags=re.DOTALL)
+        # Strip surrounding <w:y> tags from {%y ...%} / {{y ...}} template tags.
+        # This is mandatory for jinja2 to generate correct xml code.
+        # Patterns are pre-compiled as class attributes to avoid recompilation.
+        for pat in self._RE_TAG_STRIP:
+            src_xml = pat.sub(r"\1 \2", src_xml)
+
+        # Same for {#y ...#} comment tags (not 'r' — comments in runs are uncommon).
+        for pat in self._RE_COMMENT_STRIP:
+            src_xml = pat.sub(r"\1 \2", src_xml)
 
         # add vMerge
         # use {% vm %} to make this table cell and its copies

From c042ae27b8e0ce093dfd662f9e33838c25bada58 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 13:42:09 +0100
Subject: [PATCH 11/22] Remove unused imports from template.py

Clean up docxtpl/template.py by removing unused imports: functools, logging, and Template from jinja2. Keeps Environment and meta from jinja2 and does not change runtime behavior; this reduces linter warnings and unnecessary dependencies.
---
 docxtpl/template.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 66f963c..59cb62c 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -8,7 +8,6 @@
 
 from os import PathLike
 from typing import TYPE_CHECKING, Any, Optional, IO, Union, Dict, Set
-import functools
 import io
 from lxml import etree
 from docx import Document
@@ -18,8 +17,7 @@
 from docx.oxml import OxmlElement
 from docx.oxml.ns import qn
 from docx.opc.constants import RELATIONSHIP_TYPE as REL_TYPE
-import logging
-from jinja2 import Environment, Template, meta
+from jinja2 import Environment, meta
 from jinja2.exceptions import TemplateError
 
 

From ac57d571c32a3613ffe9f9192c8235cf78d818d4 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 13:48:58 +0100
Subject: [PATCH 12/22] Clarify header/footer fallback comment

Update comment in docxtpl/template.py to clarify the fallback behavior when processing headers and footers. The comment now explains the fallback guards against unexpected part structure (e.g. blob is None or missing attributes) rather than implying it handles malformed XML; malformed XML would still fail in build_headers_footers_xml. No functional change.
---
 docxtpl/template.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 59cb62c..abcff49 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -601,7 +601,7 @@ def render(
 
         # Headers & Footers - skip when no Jinja tags are present.
         # Uses both _JINJA_PATTERN (intact tags) and _RE_JINJA_OPEN (tags
-        # split across XML runs by Word). Falls back to full render on error.
+        # split across XML runs by Word).
         for uri in (self.HEADER_URI, self.FOOTER_URI):
             try:
                 has_jinja = any(
@@ -616,8 +616,9 @@ def render(
                     for relKey, xml in self.build_headers_footers_xml(context, uri, jinja_env):
                         self.map_headers_footers_xml(relKey, xml)
             except Exception:
-                # Fallback: if the fast-path check raises (e.g. malformed XML
-                # in a part), process all headers/footers unconditionally.
+                # Fallback: guards against unexpected part structure (e.g. blob
+                # is None, missing attributes). Not malformed XML - that would
+                # fail in build_headers_footers_xml regardless.
                 for relKey, xml in self.build_headers_footers_xml(context, uri, jinja_env):
                     self.map_headers_footers_xml(relKey, xml)
 

From 8d486128ec9be06b265b6c8124f7b8b839540662 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 15:58:26 +0100
Subject: [PATCH 13/22] Prebuild and cache inline image XML

Avoid calling python-docx per-image by generating a CT_Inline-based XML template once and using str.format() to fill sentinels (keeping compatibility with installed python-docx). Add caching of generated image XML per (part, descriptor, width, height) to skip repeated I/O, SHA1 work and header parsing. Use package.get_or_add_image_part and relate_to with RT.IMAGE, compute scaled_dimensions, assign shape_id from docx_ids_index, and xml-escape filenames. Also add a _image_cache dict on DocxTemplate and adjust hyperlink handling to use the local part variable.
---
 docxtpl/inline_image.py | 98 ++++++++++++++++++++++++++++++++++++++---
 docxtpl/template.py     |  1 +
 2 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index f860749..781976b 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -4,8 +4,62 @@
 
 @author: Eric Lapouyade
 """
+from xml.sax.saxutils import escape as xml_escape
+
+from docx.opc.constants import RELATIONSHIP_TYPE as RT
 from docx.oxml import OxmlElement, parse_xml
 from docx.oxml.ns import qn
+from docx.oxml.shape import CT_Inline
+from docx.shared import Emu
+
+
+def _build_inline_image_xml_template():
+    """Generate the XML format string by calling python-docx with sentinel values.
+
+    This ensures the template always matches the installed python-docx version's
+    XML structure, even after upgrades. We call CT_Inline.new_pic_inline() once
+    with recognizable sentinel values, serialize to XML, then replace the
+    sentinels with Python format placeholders.
+    """
+    import uuid
+
+    # Use GUIDs for string sentinels - guaranteed no collision with XML content
+    _RID_SENTINEL = str(uuid.uuid4())
+    _FILENAME_SENTINEL = str(uuid.uuid4())
+
+    # For numeric sentinels, use unique integers derived from UUIDs.
+    # shape_id is xsd:unsignedInt (max 4,294,967,295 / 32-bit).
+    # cx/cy are EMU values typed as xsd:long (64-bit).
+    # All use 9-digit range [100000000, 999999999] to stay within 32-bit
+    # and avoid any accidental collisions with each other.
+    _SHAPE_ID = uuid.uuid4().int % (9 * 10**8) + 10**8
+    _CX_INT = uuid.uuid4().int % (9 * 10**8) + 10**8
+    _CY_INT = uuid.uuid4().int % (9 * 10**8) + 10**8
+
+    inline = CT_Inline.new_pic_inline(
+        _SHAPE_ID,
+        _RID_SENTINEL,
+        _FILENAME_SENTINEL,
+        Emu(_CX_INT),
+        Emu(_CY_INT),
+    )
+    xml = inline.xml
+
+    # Replace sentinel values with format string placeholders
+    xml = xml.replace(str(_SHAPE_ID), "{shape_id}")
+    xml = xml.replace(_RID_SENTINEL, "{rId}")
+    xml = xml.replace(_FILENAME_SENTINEL, "{filename}")
+    xml = xml.replace(str(_CX_INT), "{cx}")
+    xml = xml.replace(str(_CY_INT), "{cy}")
+
+    return xml
+
+
+# Pre-built XML template for inline images, derived from the installed
+# python-docx version. Using str.format() on this template avoids calling
+# CT_Inline.new_pic_inline() per image (which does 2x parse_xml() +
+# element manipulation + .xml serialization each time).
+_INLINE_IMAGE_XML = _build_inline_image_xml_template()
 
 
 class InlineImage(object):
@@ -50,16 +104,48 @@ def _add_hyperlink(self, run, url, part):
         return run
 
     def _insert_image(self):
-        pic = self.tpl.current_rendering_part.new_pic_inline(
-            self.image_descriptor,
-            self.width,
-            self.height,
-        ).xml
+        part = self.tpl.current_rendering_part
+        image_descriptor = self.image_descriptor
+
+        # Cache generated XML per (part, descriptor, width, height) to avoid
+        # repeated file I/O, SHA1 computation, and header parsing.
+        cache = self.tpl._image_cache
+        cache_key = (id(part), image_descriptor, self.width, self.height)
+
+        if cache_key in cache:
+            pic = cache[cache_key]
+        else:
+            # Get or add the image part (handles deduplication via SHA1 internally)
+            package = part._package
+            image_part = package.get_or_add_image_part(image_descriptor)
+            rId = part.relate_to(image_part, RT.IMAGE)
+            image = image_part.image
+            cx, cy = image.scaled_dimensions(self.width, self.height)
+
+            # Assign shape_id from a simple counter. python-docx's
+            # new_pic_inline() would call its next_id property which does an
+            # XPath("//@id") over the entire XML tree on every call - but we
+            # bypass that entirely by generating the XML ourselves.
+            # fix_docpr_ids() renumbers all IDs after rendering anyway.
+            self.tpl.docx_ids_index += 1
+            shape_id = self.tpl.docx_ids_index
+
+            # Generate XML directly as a string using a pre-built template
+            # rather than calling CT_Inline.new_pic_inline() per image.
+            pic = _INLINE_IMAGE_XML.format(
+                cx=int(cx),
+                cy=int(cy),
+                shape_id=shape_id,
+                filename=xml_escape(image.filename),
+                rId=rId,
+            )
+            cache[cache_key] = pic
+
         if self.anchor:
             run = parse_xml(pic)
             if run.xpath(".//a:blip"):
                 hyperlink = self._add_hyperlink(
-                    run, self.anchor, self.tpl.current_rendering_part
+                    run, self.anchor, part
                 )
                 pic = hyperlink.xml
 
diff --git a/docxtpl/template.py b/docxtpl/template.py
index abcff49..a0d325b 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -171,6 +171,7 @@ def render_init(self):
         self.pic_map = {}
         self.current_rendering_part = None
         self.docx_ids_index = 1000
+        self._image_cache = {}
         self.is_saved = False
 
     def __getattr__(self, name):

From ddf1687f9dbf592199e4aff6cc52e541455d3616 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 16:25:48 +0100
Subject: [PATCH 14/22] Optimize image part deduplication

Add an O(1) SHA1 index for image parts and a fast _get_or_add_image_part helper on DocxTemplate to avoid python-docx's O(n) linear scan and repeated SHA1 recomputation. Initialize the index in the constructor (_init_image_parts_index), seed it from existing image parts, and maintain a sequential partname counter to prevent partname collisions. Update InlineImage to call tpl._get_or_add_image_part (which returns (image_part, image)) instead of package.get_or_add_image_part, and use the returned Image object. This improves performance and reduces redundant SHA1 work when inserting/looking up images.
---
 docxtpl/inline_image.py |  7 +++---
 docxtpl/template.py     | 55 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 781976b..7d353ad 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -115,11 +115,10 @@ def _insert_image(self):
         if cache_key in cache:
             pic = cache[cache_key]
         else:
-            # Get or add the image part (handles deduplication via SHA1 internally)
-            package = part._package
-            image_part = package.get_or_add_image_part(image_descriptor)
+            # Get or add the image part with O(1) SHA1 deduplication,
+            # avoiding the O(n) linear scan and SHA1 recomputation per lookup.
+            image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
-            image = image_part.image
             cx, cy = image.scaled_dimensions(self.width, self.height)
 
             # Assign shape_id from a simple counter. python-docx's
diff --git a/docxtpl/template.py b/docxtpl/template.py
index a0d325b..69eb2f7 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -173,6 +173,61 @@ def render_init(self):
         self.docx_ids_index = 1000
         self._image_cache = {}
         self.is_saved = False
+        self._init_image_parts_index()
+
+    def _init_image_parts_index(self):
+        """Build an O(1) SHA1 index of existing image parts in the package.
+
+        This enables fast deduplication in _get_or_add_image_part(), avoiding
+        the O(n) linear scan and repeated SHA1 recomputation that occurs in
+        the default python-docx image-part lookup.
+        """
+        package = self.docx._part._package
+        image_parts = package.image_parts
+
+        # Seed the index from existing image parts in the template.
+        # ImagePart.sha1 recomputes on each access, but this is a one-time
+        # cost for the (typically few) images already in the template.
+        self._image_sha1_index = {}
+        for ip in image_parts:
+            self._image_sha1_index[ip.sha1] = ip
+
+        # Start the partname counter after all existing image parts to avoid
+        # collisions with partnames already in the package.
+        self._image_part_counter = len(image_parts._image_parts)
+
+    def _get_or_add_image_part(self, image_descriptor):
+        """Return (image_part, image) for the given image_descriptor.
+
+        Performs the same function as python-docx's
+        Package.get_or_add_image_part() but with O(1) SHA1 deduplication
+        (instead of O(n) linear scan with repeated SHA1 recomputation) and
+        sequential partname assignment (instead of O(n²) gap-search).
+        """
+        from docx.image.image import Image
+        from docx.opc.packuri import PackURI
+        from docx.parts.image import ImagePart
+
+        image = Image.from_file(image_descriptor)
+        sha1 = image.sha1  # @lazyproperty — computed once per Image object
+
+        image_part = self._image_sha1_index.get(sha1)
+        if image_part is not None:
+            return image_part, image
+
+        # New unique image — create part with sequential partname
+        self._image_part_counter += 1
+        partname = PackURI(
+            "/word/media/image%d.%s" % (self._image_part_counter, image.ext)
+        )
+        image_part = ImagePart.from_image(image, partname)
+
+        # Add to the package collection and the SHA1 index
+        package = self.docx._part._package
+        package.image_parts.append(image_part)
+        self._image_sha1_index[sha1] = image_part
+
+        return image_part, image
 
     def __getattr__(self, name):
         return getattr(self.docx, name)

From 4a96bc4b5b505812a15736538b98e9cf140299e0 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 16:51:28 +0100
Subject: [PATCH 15/22] Use descriptor cache for image deduplication

Replace the SHA1-based image-part index with a descriptor-keyed cache (_image_descriptor_index) to deduplicate images by file-path (O(1)) and avoid expensive SHA1 hashing. For string path descriptors the cache is used to return existing (image_part, image) tuples; non-string descriptors (e.g. file-like objects) fall back to always creating a new part. Keeps sequential partname assignment and appends new ImagePart to the package; caches the result for string descriptors. This improves performance when adding many images (e.g. large photos) by eliminating repeated SHA1 computation.
---
 docxtpl/inline_image.py |  6 +++---
 docxtpl/template.py     | 47 +++++++++++++++++++++--------------------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 7d353ad..3c69168 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -108,15 +108,15 @@ def _insert_image(self):
         image_descriptor = self.image_descriptor
 
         # Cache generated XML per (part, descriptor, width, height) to avoid
-        # repeated file I/O, SHA1 computation, and header parsing.
+        # repeated file I/O, image hashing, and header parsing.
         cache = self.tpl._image_cache
         cache_key = (id(part), image_descriptor, self.width, self.height)
 
         if cache_key in cache:
             pic = cache[cache_key]
         else:
-            # Get or add the image part with O(1) SHA1 deduplication,
-            # avoiding the O(n) linear scan and SHA1 recomputation per lookup.
+            # Get or add the image part with O(1) descriptor-based dedup,
+            # avoiding the O(n) linear scan in python-docx's default path.
             image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
             cx, cy = image.scaled_dimensions(self.width, self.height)
diff --git a/docxtpl/template.py b/docxtpl/template.py
index 69eb2f7..9e9faaf 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -176,21 +176,17 @@ def render_init(self):
         self._init_image_parts_index()
 
     def _init_image_parts_index(self):
-        """Build an O(1) SHA1 index of existing image parts in the package.
+        """Initialize image-part tracking for fast insertion.
 
-        This enables fast deduplication in _get_or_add_image_part(), avoiding
-        the O(n) linear scan and repeated SHA1 recomputation that occurs in
-        the default python-docx image-part lookup.
+        Uses a descriptor-keyed cache (file path string) for O(1) dedup of
+        images added during rendering, avoiding expensive content hashing.
         """
         package = self.docx._part._package
         image_parts = package.image_parts
 
-        # Seed the index from existing image parts in the template.
-        # ImagePart.sha1 recomputes on each access, but this is a one-time
-        # cost for the (typically few) images already in the template.
-        self._image_sha1_index = {}
-        for ip in image_parts:
-            self._image_sha1_index[ip.sha1] = ip
+        # Descriptor-keyed cache: maps image_descriptor -> (image_part, image)
+        # This is the primary dedup mechanism and avoids expensive content hashing.
+        self._image_descriptor_index = {}
 
         # Start the partname counter after all existing image parts to avoid
         # collisions with partnames already in the package.
@@ -199,35 +195,40 @@ def _init_image_parts_index(self):
     def _get_or_add_image_part(self, image_descriptor):
         """Return (image_part, image) for the given image_descriptor.
 
-        Performs the same function as python-docx's
-        Package.get_or_add_image_part() but with O(1) SHA1 deduplication
-        (instead of O(n) linear scan with repeated SHA1 recomputation) and
-        sequential partname assignment (instead of O(n²) gap-search).
+        Uses the descriptor itself (file path) as the dedup key, avoiding
+        expensive content hashing.  Falls back to always creating a new part
+        for non-hashable descriptors (file-like objects).
         """
         from docx.image.image import Image
         from docx.opc.packuri import PackURI
         from docx.parts.image import ImagePart
 
-        image = Image.from_file(image_descriptor)
-        sha1 = image.sha1  # @lazyproperty — computed once per Image object
+        # For string paths, use the path as a cheap dedup key.
+        cache_key = image_descriptor if isinstance(image_descriptor, str) else None
+
+        if cache_key is not None:
+            cached = self._image_descriptor_index.get(cache_key)
+            if cached is not None:
+                return cached
 
-        image_part = self._image_sha1_index.get(sha1)
-        if image_part is not None:
-            return image_part, image
+        image = Image.from_file(image_descriptor)
 
-        # New unique image — create part with sequential partname
+        # Create image part with sequential partname
         self._image_part_counter += 1
         partname = PackURI(
             "/word/media/image%d.%s" % (self._image_part_counter, image.ext)
         )
         image_part = ImagePart.from_image(image, partname)
 
-        # Add to the package collection and the SHA1 index
+        # Add to the package collection
         package = self.docx._part._package
         package.image_parts.append(image_part)
-        self._image_sha1_index[sha1] = image_part
 
-        return image_part, image
+        result = (image_part, image)
+        if cache_key is not None:
+            self._image_descriptor_index[cache_key] = result
+
+        return result
 
     def __getattr__(self, name):
         return getattr(self.docx, name)

From 98d8aba7b63b2f20be808d1017701eaf2665a324 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:39:45 +0100
Subject: [PATCH 16/22] Cache image metadata instead of XML

Cache only the expensive image metadata (rId, dimensions, filename) per (part, descriptor, width, height) instead of the full inline XML. A fresh shape_id is now assigned for every insertion so drawing IDs remain unique (important for headers/footers/footnotes which aren't renumbered by fix_docpr_ids()). This preserves performance benefits (avoids repeated image part lookup, hashing and header parsing) while preventing duplicate drawing IDs; cx/cy are stored as ints and filename is xml-escaped when cached.
---
 docxtpl/inline_image.py | 45 +++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 3c69168..10441f0 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -107,38 +107,39 @@ def _insert_image(self):
         part = self.tpl.current_rendering_part
         image_descriptor = self.image_descriptor
 
-        # Cache generated XML per (part, descriptor, width, height) to avoid
-        # repeated file I/O, image hashing, and header parsing.
+        # Cache the expensive parts (image part lookup, rId, dimensions) per
+        # (part, descriptor, width, height).  The XML string itself is NOT
+        # cached because each insertion needs a unique shape_id - header/footer
+        # and footnote parts are not renumbered by fix_docpr_ids().
         cache = self.tpl._image_cache
         cache_key = (id(part), image_descriptor, self.width, self.height)
 
         if cache_key in cache:
-            pic = cache[cache_key]
+            rId, cx, cy, filename = cache[cache_key]
         else:
             # Get or add the image part with O(1) descriptor-based dedup,
             # avoiding the O(n) linear scan in python-docx's default path.
             image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
             cx, cy = image.scaled_dimensions(self.width, self.height)
-
-            # Assign shape_id from a simple counter. python-docx's
-            # new_pic_inline() would call its next_id property which does an
-            # XPath("//@id") over the entire XML tree on every call - but we
-            # bypass that entirely by generating the XML ourselves.
-            # fix_docpr_ids() renumbers all IDs after rendering anyway.
-            self.tpl.docx_ids_index += 1
-            shape_id = self.tpl.docx_ids_index
-
-            # Generate XML directly as a string using a pre-built template
-            # rather than calling CT_Inline.new_pic_inline() per image.
-            pic = _INLINE_IMAGE_XML.format(
-                cx=int(cx),
-                cy=int(cy),
-                shape_id=shape_id,
-                filename=xml_escape(image.filename),
-                rId=rId,
-            )
-            cache[cache_key] = pic
+            filename = xml_escape(image.filename)
+            cache[cache_key] = (rId, int(cx), int(cy), filename)
+
+        # Always assign a fresh shape_id per insertion so that drawing IDs
+        # are unique in every part (including headers/footers/footnotes
+        # which are not renumbered by fix_docpr_ids()).
+        self.tpl.docx_ids_index += 1
+        shape_id = self.tpl.docx_ids_index
+
+        # Generate XML directly as a string using a pre-built template
+        # rather than calling CT_Inline.new_pic_inline() per image.
+        pic = _INLINE_IMAGE_XML.format(
+            cx=int(cx),
+            cy=int(cy),
+            shape_id=shape_id,
+            filename=filename,
+            rId=rId,
+        )
 
         if self.anchor:
             run = parse_xml(pic)

From e4886535593541d6ee86443d28334dee12dd11a4 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:46:16 +0100
Subject: [PATCH 17/22] Handle non-hashable descriptors; escape quotes

Use id() for non-hashable image descriptors (e.g. file-like objects) when building the image cache key to avoid TypeError on dict lookup. Also escape double quotes in image filenames for XML attribute usage by passing a mapping to xml_escape so quotes become &quot;. Cache semantics and per-insertion shape_id assignment are otherwise unchanged.
---
 docxtpl/inline_image.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 10441f0..3a207be 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -112,7 +112,10 @@ def _insert_image(self):
         # cached because each insertion needs a unique shape_id - header/footer
         # and footnote parts are not renumbered by fix_docpr_ids().
         cache = self.tpl._image_cache
-        cache_key = (id(part), image_descriptor, self.width, self.height)
+        # Use id() for non-hashable descriptors (file-like objects) to avoid
+        # TypeError on dict lookup.
+        desc_key = image_descriptor if isinstance(image_descriptor, str) else id(image_descriptor)
+        cache_key = (id(part), desc_key, self.width, self.height)
 
         if cache_key in cache:
             rId, cx, cy, filename = cache[cache_key]
@@ -122,7 +125,8 @@ def _insert_image(self):
             image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
             cx, cy = image.scaled_dimensions(self.width, self.height)
-            filename = xml_escape(image.filename)
+            # Escape for use inside XML attribute (quotes must be escaped)
+            filename = xml_escape(image.filename, {'"': "&quot;"})
             cache[cache_key] = (rId, int(cx), int(cy), filename)
 
         # Always assign a fresh shape_id per insertion so that drawing IDs

From 7c52c563f74e6ed7ed631213d6b0d126cffeecd3 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:49:15 +0100
Subject: [PATCH 18/22] Scan image partnames to derive counter

Avoid using len() of image parts to pick the next image partname index, which could collide when numbering is non-contiguous. Instead scan existing image partnames (using partname.baseURI when available, otherwise str(partname)), extract numeric suffixes with a regex (/image(\d+)\.), track the maximum index, and set the image part counter to that max. This ensures new image partnames won't reuse an already-present index.
---
 docxtpl/template.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 9e9faaf..c63a2d3 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -188,9 +188,19 @@ def _init_image_parts_index(self):
         # This is the primary dedup mechanism and avoids expensive content hashing.
         self._image_descriptor_index = {}
 
-        # Start the partname counter after all existing image parts to avoid
-        # collisions with partnames already in the package.
-        self._image_part_counter = len(image_parts._image_parts)
+        # Derive the next partname index by scanning existing partnames once.
+        # Using len() alone would collide with non-contiguous numbering
+        # (e.g. image1.png + image3.png → len=2 → next would be image3.ext).
+        max_index = 0
+        for ip in image_parts:
+            # Partnames follow /word/media/imageN.ext pattern
+            name = ip.partname.baseURI if hasattr(ip.partname, 'baseURI') else str(ip.partname)
+            m = re.search(r'/image(\d+)\.', name)
+            if m:
+                idx = int(m.group(1))
+                if idx > max_index:
+                    max_index = idx
+        self._image_part_counter = max_index
 
     def _get_or_add_image_part(self, image_descriptor):
         """Return (image_part, image) for the given image_descriptor.

From 7581a333ec77046ab7e87bffeac0e78defd9b82d Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:55:58 +0100
Subject: [PATCH 19/22] Always use str(partname) for image parts

Replace conditional use of partname.baseURI with a direct str(partname) conversion when iterating image parts. This makes the code rely on a consistent string representation for part names (used by the /imageN.ext regex) and avoids depending on the presence of a baseURI attribute across different part implementations.
---
 docxtpl/template.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index c63a2d3..078d172 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -194,7 +194,7 @@ def _init_image_parts_index(self):
         max_index = 0
         for ip in image_parts:
             # Partnames follow /word/media/imageN.ext pattern
-            name = ip.partname.baseURI if hasattr(ip.partname, 'baseURI') else str(ip.partname)
+            name = str(ip.partname)
             m = re.search(r'/image(\d+)\.', name)
             if m:
                 idx = int(m.group(1))

From 82fd69c73314c005654a84998cd802964d0c1f8d Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 17:59:06 +0100
Subject: [PATCH 20/22] Initialize docx_ids_index from existing docPr ids

Replace the hardcoded docx_ids_index initialization with a routine that scans all package parts (body, headers, footers, footnotes) for wp:docPr elements and sets the counter above the maximum found id (minimum 1000). This prevents id collisions when inserting new drawings into parts that were not renumbered by fix_docpr_ids. The new method is called during initialization and safely skips non-XML or unreadable parts.
---
 docxtpl/template.py | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/docxtpl/template.py b/docxtpl/template.py
index 078d172..cf339df 100644
--- a/docxtpl/template.py
+++ b/docxtpl/template.py
@@ -170,10 +170,48 @@ def render_init(self):
         self.init_docx()
         self.pic_map = {}
         self.current_rendering_part = None
-        self.docx_ids_index = 1000
         self._image_cache = {}
         self.is_saved = False
         self._init_image_parts_index()
+        self._init_docx_ids_index()
+
+    def _init_docx_ids_index(self):
+        """Set docx_ids_index above the maximum existing wp:docPr id.
+
+        fix_docpr_ids() only renumbers the body tree, so IDs in headers,
+        footers, and footnotes retain their original values. Starting the
+        counter above the global maximum prevents collisions when inserting
+        new drawings into any part.
+        """
+        import docx.oxml.ns as _ns
+        wp_ns = _ns.nsmap['wp']
+        tag = "{%s}docPr" % wp_ns
+        max_id = 0
+
+        # Scan all parts (body + headers + footers + footnotes)
+        for part in self.docx._part._package.parts:
+            if not hasattr(part, 'blob') or part.blob is None:
+                continue
+            # Only scan XML parts that could contain drawings
+            ct = getattr(part, 'content_type', '')
+            if not ct.startswith('application/vnd.openxmlformats-officedocument'):
+                continue
+            try:
+                tree = etree.fromstring(part.blob)
+            except Exception:
+                continue
+            for elt in tree.iter(tag):
+                id_val = elt.get('id')
+                if id_val is not None:
+                    try:
+                        val = int(id_val)
+                        if val > max_id:
+                            max_id = val
+                    except ValueError:
+                        pass
+
+        # Start above the highest existing ID (minimum 1000 for safety)
+        self.docx_ids_index = max(max_id, 1000)
 
     def _init_image_parts_index(self):
         """Initialize image-part tracking for fast insertion.

From ef56632b1938690db98ee9b5cf2c2fe7a7eb34e4 Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 18:05:16 +0100
Subject: [PATCH 21/22] Normalize None image filename before escaping

Treat image.filename == None (e.g., BytesIO/file-like descriptors) as an empty string before calling xml_escape so XML attribute generation matches python-docx behavior. Added a clarifying comment and ensure the escaped filename is stored in the cache to avoid None-related issues when rendering.
---
 docxtpl/inline_image.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 3a207be..684b5ce 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -125,8 +125,10 @@ def _insert_image(self):
             image_part, image = self.tpl._get_or_add_image_part(image_descriptor)
             rId = part.relate_to(image_part, RT.IMAGE)
             cx, cy = image.scaled_dimensions(self.width, self.height)
-            # Escape for use inside XML attribute (quotes must be escaped)
-            filename = xml_escape(image.filename, {'"': "&quot;"})
+            # Escape for use inside XML attribute (quotes must be escaped).
+            # image.filename is None for file-like descriptors (BytesIO);
+            # normalize to empty string to match python-docx's behavior.
+            filename = xml_escape(image.filename or "", {'"': "&quot;"})
             cache[cache_key] = (rId, int(cx), int(cy), filename)
 
         # Always assign a fresh shape_id per insertion so that drawing IDs

From f316ca8a4b944ce83ea96cdb6990559a71d23f8d Mon Sep 17 00:00:00 2001
From: Jack Byrne <46843566+JackByrne@users.noreply.github.com>
Date: Mon, 18 May 2026 18:15:13 +0100
Subject: [PATCH 22/22] Skip caching unhashable image descriptors

Only build and use a cache key when the image_descriptor is hashable. Previously id() was used for non-hashable descriptors (e.g. file-like objects), which could risk aliasing after GC and lead to incorrect deduplication. Now the code attempts to construct a cache key with the descriptor and falls back to skipping caching for unhashable descriptors; cache entries are only read/written when a valid cache_key exists. Filename normalization and per-insertion shape_id behavior are unchanged.
---
 docxtpl/inline_image.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/docxtpl/inline_image.py b/docxtpl/inline_image.py
index 684b5ce..da35bbd 100644
--- a/docxtpl/inline_image.py
+++ b/docxtpl/inline_image.py
@@ -112,12 +112,16 @@ def _insert_image(self):
         # cached because each insertion needs a unique shape_id - header/footer
         # and footnote parts are not renumbered by fix_docpr_ids().
         cache = self.tpl._image_cache
-        # Use id() for non-hashable descriptors (file-like objects) to avoid
-        # TypeError on dict lookup.
-        desc_key = image_descriptor if isinstance(image_descriptor, str) else id(image_descriptor)
-        cache_key = (id(part), desc_key, self.width, self.height)
-
-        if cache_key in cache:
+        # For hashable descriptors (strings, paths), cache by value.
+        # For unhashable descriptors (file-like objects), skip caching
+        # entirely — using id() would risk aliasing after GC.
+        try:
+            cache_key = (id(part), image_descriptor, self.width, self.height)
+            hash(cache_key) is not None  # trigger TypeError if unhashable
+        except TypeError:
+            cache_key = None
+
+        if cache_key is not None and cache_key in cache:
             rId, cx, cy, filename = cache[cache_key]
         else:
             # Get or add the image part with O(1) descriptor-based dedup,
@@ -129,7 +133,8 @@ def _insert_image(self):
             # image.filename is None for file-like descriptors (BytesIO);
             # normalize to empty string to match python-docx's behavior.
             filename = xml_escape(image.filename or "", {'"': "&quot;"})
-            cache[cache_key] = (rId, int(cx), int(cy), filename)
+            if cache_key is not None:
+                cache[cache_key] = (rId, int(cx), int(cy), filename)
 
         # Always assign a fresh shape_id per insertion so that drawing IDs
         # are unique in every part (including headers/footers/footnotes