From 3b0899ff8089e12d59a1b54f9b97c39c526ed5d8 Mon Sep 17 00:00:00 2001 From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com> Date: Thu, 12 Feb 2026 00:15:30 +0530 Subject: [PATCH] fix: replace mutable default arguments with None Using mutable default arguments (e.g. `def f(x=[])`) is a well-known Python anti-pattern because the default object is shared across all calls, leading to subtle and hard-to-debug state leakage between invocations. Replace mutable default lists with `None` and initialize inside the function body in: - `InternLM2ForCausalLM.build_inputs` (history=[]) - `InternLM2ForCausalLM.chat` (history=[]) - `InternLM2ForCausalLM.stream_chat` (history=[]) - `build_vtab_dataset` (classnames=[]) - `contains_quantity_word` (special_keep_words=[]) --- clip_benchmark/clip_benchmark/datasets/builder.py | 4 +++- internvl_chat/eval/mathvista/utilities.py | 4 +++- .../internvl/model/internlm2/modeling_internlm2.py | 12 +++++++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/clip_benchmark/clip_benchmark/datasets/builder.py b/clip_benchmark/clip_benchmark/datasets/builder.py index ed47f89a7..fa631fe03 100644 --- a/clip_benchmark/clip_benchmark/datasets/builder.py +++ b/clip_benchmark/clip_benchmark/datasets/builder.py @@ -519,7 +519,9 @@ def has_kaggle(): return call('which kaggle', shell=True) == 0 -def build_vtab_dataset(dataset_name, transform, download=True, split='test', data_dir='root', classnames=[]): +def build_vtab_dataset(dataset_name, transform, download=True, split='test', data_dir='root', classnames=None): + if classnames is None: + classnames = [] # Using VTAB splits instead of default TFDS splits from .tfds import (VTABIterableDataset, disable_gpus_on_tensorflow, download_tfds_dataset) diff --git a/internvl_chat/eval/mathvista/utilities.py b/internvl_chat/eval/mathvista/utilities.py index 148460634..c474e3c95 100644 --- a/internvl_chat/eval/mathvista/utilities.py +++ b/internvl_chat/eval/mathvista/utilities.py @@ -83,7 +83,9 @@ def contains_number_word(text): return False # If none of the words could be converted to a number, return False -def contains_quantity_word(text, special_keep_words=[]): +def contains_quantity_word(text, special_keep_words=None): + if special_keep_words is None: + special_keep_words = [] # check if text contains a quantity word quantity_words = ['most', 'least', 'fewest' 'more', 'less', 'fewer', diff --git a/internvl_chat/internvl/model/internlm2/modeling_internlm2.py b/internvl_chat/internvl/model/internlm2/modeling_internlm2.py index 569513dff..d7efd64d2 100644 --- a/internvl_chat/internvl/model/internlm2/modeling_internlm2.py +++ b/internvl_chat/internvl/model/internlm2/modeling_internlm2.py @@ -1171,7 +1171,9 @@ def _reorder_cache(past_key_values, beam_idx): ) return reordered_past - def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = [], meta_instruction=''): + def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, meta_instruction=''): + if history is None: + history = [] if tokenizer.add_bos_token: prompt = '' else: @@ -1188,7 +1190,7 @@ def chat( self, tokenizer, query: str, - history: List[Tuple[str, str]] = [], + history: List[Tuple[str, str]] = None, streamer: Optional[BaseStreamer] = None, max_new_tokens: int = 1024, do_sample: bool = True, @@ -1199,6 +1201,8 @@ def chat( '- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文.', **kwargs, ): + if history is None: + history = [] inputs = self.build_inputs(tokenizer, query, history, meta_instruction) inputs = {k: v.to(self.device) for k, v in inputs.items() if torch.is_tensor(v)} # also add end-of-assistant token in eos token id to avoid unnecessary generation @@ -1224,7 +1228,7 @@ def stream_chat( self, tokenizer, query: str, - history: List[Tuple[str, str]] = [], + history: List[Tuple[str, str]] = None, max_new_tokens: int = 1024, do_sample: bool = True, temperature: float = 0.8, @@ -1237,6 +1241,8 @@ def stream_chat( ('你好,有什么可以帮助您的吗', [('你好', '你好,有什么可以帮助您的吗')]) ('你好,有什么可以帮助您的吗?', [('你好', '你好,有什么可以帮助您的吗?')]) """ + if history is None: + history = [] if BaseStreamer is None: raise ModuleNotFoundError( 'The version of `transformers` is too low. Please make sure '