From 3b0899ff8089e12d59a1b54f9b97c39c526ed5d8 Mon Sep 17 00:00:00 2001
From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com>
Date: Thu, 12 Feb 2026 00:15:30 +0530
Subject: [PATCH] fix: replace mutable default arguments with None

Using mutable default arguments (e.g. `def f(x=[])`) is a well-known
Python anti-pattern because the default object is shared across all
calls, leading to subtle and hard-to-debug state leakage between
invocations.

Replace mutable default lists with `None` and initialize inside the
function body in:

- `InternLM2ForCausalLM.build_inputs` (history=[])
- `InternLM2ForCausalLM.chat` (history=[])
- `InternLM2ForCausalLM.stream_chat` (history=[])
- `build_vtab_dataset` (classnames=[])
- `contains_quantity_word` (special_keep_words=[])
---
 clip_benchmark/clip_benchmark/datasets/builder.py    |  4 +++-
 internvl_chat/eval/mathvista/utilities.py            |  4 +++-
 .../internvl/model/internlm2/modeling_internlm2.py   | 12 +++++++++---
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/clip_benchmark/clip_benchmark/datasets/builder.py b/clip_benchmark/clip_benchmark/datasets/builder.py
index ed47f89a7..fa631fe03 100644
--- a/clip_benchmark/clip_benchmark/datasets/builder.py
+++ b/clip_benchmark/clip_benchmark/datasets/builder.py
@@ -519,7 +519,9 @@ def has_kaggle():
     return call('which kaggle', shell=True) == 0
 
 
-def build_vtab_dataset(dataset_name, transform, download=True, split='test', data_dir='root', classnames=[]):
+def build_vtab_dataset(dataset_name, transform, download=True, split='test', data_dir='root', classnames=None):
+    if classnames is None:
+        classnames = []
     # Using VTAB splits instead of default TFDS splits
     from .tfds import (VTABIterableDataset, disable_gpus_on_tensorflow,
                        download_tfds_dataset)
diff --git a/internvl_chat/eval/mathvista/utilities.py b/internvl_chat/eval/mathvista/utilities.py
index 148460634..c474e3c95 100644
--- a/internvl_chat/eval/mathvista/utilities.py
+++ b/internvl_chat/eval/mathvista/utilities.py
@@ -83,7 +83,9 @@ def contains_number_word(text):
     return False  # If none of the words could be converted to a number, return False
 
 
-def contains_quantity_word(text, special_keep_words=[]):
+def contains_quantity_word(text, special_keep_words=None):
+    if special_keep_words is None:
+        special_keep_words = []
     # check if text contains a quantity word
     quantity_words = ['most', 'least', 'fewest'
                                        'more', 'less', 'fewer',
diff --git a/internvl_chat/internvl/model/internlm2/modeling_internlm2.py b/internvl_chat/internvl/model/internlm2/modeling_internlm2.py
index 569513dff..d7efd64d2 100644
--- a/internvl_chat/internvl/model/internlm2/modeling_internlm2.py
+++ b/internvl_chat/internvl/model/internlm2/modeling_internlm2.py
@@ -1171,7 +1171,9 @@ def _reorder_cache(past_key_values, beam_idx):
             )
         return reordered_past
 
-    def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = [], meta_instruction=''):
+    def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, meta_instruction=''):
+        if history is None:
+            history = []
         if tokenizer.add_bos_token:
             prompt = ''
         else:
@@ -1188,7 +1190,7 @@ def chat(
         self,
         tokenizer,
         query: str,
-        history: List[Tuple[str, str]] = [],
+        history: List[Tuple[str, str]] = None,
         streamer: Optional[BaseStreamer] = None,
         max_new_tokens: int = 1024,
         do_sample: bool = True,
@@ -1199,6 +1201,8 @@ def chat(
                                 '- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文.',
         **kwargs,
     ):
+        if history is None:
+            history = []
         inputs = self.build_inputs(tokenizer, query, history, meta_instruction)
         inputs = {k: v.to(self.device) for k, v in inputs.items() if torch.is_tensor(v)}
         # also add end-of-assistant token in eos token id to avoid unnecessary generation
@@ -1224,7 +1228,7 @@ def stream_chat(
         self,
         tokenizer,
         query: str,
-        history: List[Tuple[str, str]] = [],
+        history: List[Tuple[str, str]] = None,
         max_new_tokens: int = 1024,
         do_sample: bool = True,
         temperature: float = 0.8,
@@ -1237,6 +1241,8 @@ def stream_chat(
         ('你好，有什么可以帮助您的吗', [('你好', '你好，有什么可以帮助您的吗')])
         ('你好，有什么可以帮助您的吗？', [('你好', '你好，有什么可以帮助您的吗？')])
         """
+        if history is None:
+            history = []
         if BaseStreamer is None:
             raise ModuleNotFoundError(
                 'The version of `transformers` is too low. Please make sure '