From 805e37d3df548f4025561c3af987bdf97a32177c Mon Sep 17 00:00:00 2001 From: degenfabian Date: Tue, 19 Aug 2025 02:45:06 +0200 Subject: [PATCH 1/4] updating loading in qwen demo to use transformer bridge --- demos/Qwen.ipynb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/demos/Qwen.ipynb b/demos/Qwen.ipynb index fba5144ae..e2afc9b9c 100644 --- a/demos/Qwen.ipynb +++ b/demos/Qwen.ipynb @@ -154,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -171,7 +171,7 @@ "torch.set_grad_enabled(False)\n", "\n", "from transformers import AutoTokenizer\n", - "from transformer_lens import HookedTransformer\n", + "from transformer_lens.model_bridge import TransformerBridge\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from transformers.generation import GenerationConfig\n", "\n", @@ -208,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -283,12 +283,13 @@ " trust_remote_code = True\n", ").eval()\n", "\n", - "tl_model = HookedTransformer.from_pretrained_no_processing(\n", + "tl_model = TransformerBridge.boot_transformers(\n", " model_path,\n", " device=device,\n", " fp32=True,\n", " dtype=torch.float32,\n", ").to(device)\n", + "tl_model.enable_compatibility_mode()\n", "\n", "assert_hf_and_tl_model_are_close(hf_model, tl_model, tokenizer)" ] @@ -302,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -335,11 +336,12 @@ " device_map=device,\n", ").eval()\n", "\n", - "tl_model = HookedTransformer.from_pretrained_no_processing(\n", + "tl_model = TransformerBridge.boot_transformers(\n", " model_path,\n", " device=device,\n", " dtype=torch.float32,\n", ").to(device)\n", + "tl_model.enable_compatibility_mode()\n", "\n", "assert_hf_and_tl_model_are_close(hf_model, tl_model, tokenizer)" ] From abcd7c16e5c34dd9fcf2be7b1a24bcd9cbb22a52 Mon Sep 17 00:00:00 2001 From: degenfabian Date: Tue, 19 Aug 2025 17:59:50 +0200 Subject: [PATCH 2/4] add qwen demo to CI --- .github/workflows/checks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 4de51026c..646e921d0 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -159,6 +159,7 @@ jobs: # - "No_Position_Experiment" - "Othello_GPT" - "Patchscopes_Generation_Demo" + - "Qwen" # - "T5" steps: - uses: actions/checkout@v3 From 0a6a13f8c605a1307c3e359bca0629b733dfbf01 Mon Sep 17 00:00:00 2001 From: jlarson4 Date: Wed, 4 Mar 2026 16:39:44 -0600 Subject: [PATCH 3/4] Updating Qwen Notebook for TransformerLens 3.x --- demos/Qwen.ipynb | 273 ++++-------------- .../supported_architectures/gemma2.py | 4 +- .../gemma3_multimodal.py | 4 +- .../supported_architectures/llama.py | 4 +- .../supported_architectures/qwen2.py | 4 +- .../supported_architectures/qwen3.py | 4 +- 6 files changed, 63 insertions(+), 230 deletions(-) diff --git a/demos/Qwen.ipynb b/demos/Qwen.ipynb index 09ef6c231..f707d3307 100644 --- a/demos/Qwen.ipynb +++ b/demos/Qwen.ipynb @@ -2,109 +2,21 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: transformers_stream_generator in /root/TransformerLens/.venv/lib/python3.10/site-packages (0.0.4)\n", - "Requirement already satisfied: plotly in /root/TransformerLens/.venv/lib/python3.10/site-packages (5.18.0)\n", - "Requirement already satisfied: circuitsvis in /root/TransformerLens/.venv/lib/python3.10/site-packages (1.43.2)\n", - "Requirement already satisfied: huggingface_hub in /root/TransformerLens/.venv/lib/python3.10/site-packages (0.20.2)\n", - "Requirement already satisfied: einops in /root/TransformerLens/.venv/lib/python3.10/site-packages (0.7.0)\n", - "Requirement already satisfied: tiktoken in /root/TransformerLens/.venv/lib/python3.10/site-packages (0.5.2)\n", - "Requirement already satisfied: datasets in /root/TransformerLens/.venv/lib/python3.10/site-packages (2.14.4)\n", - "Requirement already satisfied: transformers>=4.26.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from transformers_stream_generator) (4.37.2)\n", - "Requirement already satisfied: tenacity>=6.2.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from plotly) (8.2.3)\n", - "Requirement already satisfied: packaging in /root/TransformerLens/.venv/lib/python3.10/site-packages (from plotly) (23.2)\n", - "Requirement already satisfied: importlib-metadata>=5.1.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (7.0.1)\n", - "Requirement already satisfied: numpy>=1.24 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (1.26.3)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.3.1)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.105)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (8.9.2.26)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (2.18.1)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (12.1.105)\n", - "Requirement already satisfied: torch>=1.10 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (2.1.2)\n", - "Requirement already satisfied: triton==2.1.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from circuitsvis) (2.1.0)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->circuitsvis) (12.3.101)\n", - "Requirement already satisfied: filelock in /root/TransformerLens/.venv/lib/python3.10/site-packages (from triton==2.1.0->circuitsvis) (3.13.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (2023.12.2)\n", - "Requirement already satisfied: requests in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (4.66.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from huggingface_hub) (4.9.0)\n", - "Requirement already satisfied: regex>=2022.1.18 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from tiktoken) (2023.12.25)\n", - "Requirement already satisfied: pyarrow>=8.0.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (14.0.2)\n", - "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (0.3.7)\n", - "Requirement already satisfied: pandas in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (2.0.3)\n", - "Requirement already satisfied: xxhash in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (3.4.1)\n", - "Requirement already satisfied: multiprocess in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (0.70.15)\n", - "Requirement already satisfied: aiohttp in /root/TransformerLens/.venv/lib/python3.10/site-packages (from datasets) (3.9.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (23.2.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.4)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.1)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.3)\n", - "Requirement already satisfied: zipp>=0.5 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from importlib-metadata>=5.1.0->circuitsvis) (3.17.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (3.6)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2.1.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from requests->huggingface_hub) (2023.11.17)\n", - "Requirement already satisfied: sympy in /root/TransformerLens/.venv/lib/python3.10/site-packages (from torch>=1.10->circuitsvis) (1.12)\n", - "Requirement already satisfied: networkx in /root/TransformerLens/.venv/lib/python3.10/site-packages (from torch>=1.10->circuitsvis) (3.1)\n", - "Requirement already satisfied: jinja2 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from torch>=1.10->circuitsvis) (3.1.2)\n", - "Requirement already satisfied: tokenizers<0.19,>=0.14 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from transformers>=4.26.1->transformers_stream_generator) (0.15.0)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from transformers>=4.26.1->transformers_stream_generator) (0.4.1)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from pandas->datasets) (2023.3.post1)\n", - "Requirement already satisfied: tzdata>=2022.1 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from pandas->datasets) (2023.4)\n", - "Requirement already satisfied: six>=1.5 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from jinja2->torch>=1.10->circuitsvis) (2.1.3)\n", - "Requirement already satisfied: mpmath>=0.19 in /root/TransformerLens/.venv/lib/python3.10/site-packages (from sympy->torch>=1.10->circuitsvis) (1.3.0)\n", - "\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m24.0\u001B[0m\n", - "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ + "# NBVAL_IGNORE_OUTPUT\n", "%pip install transformers_stream_generator plotly circuitsvis huggingface_hub einops tiktoken datasets" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Running as a Jupyter notebook - intended for development only!\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_13850/410710250.py:21: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", - " ipython.magic(\"load_ext autoreload\")\n", - "/tmp/ipykernel_13850/410710250.py:22: DeprecationWarning: `magic(...)` is deprecated since IPython 0.13 (warning added in 8.1), use run_line_magic(magic_name, parameter_s).\n", - " ipython.magic(\"autoreload 2\")\n" - ] - } - ], + "outputs": [], "source": [ + "# NBVAL_IGNORE_OUTPUT\n", "# Janky code to do different setup when run in a Colab notebook vs VSCode\n", "DEVELOPMENT_MODE = False\n", "try:\n", @@ -131,17 +43,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using renderer: colab\n" - ] - } - ], + "outputs": [], "source": [ "# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh\n", "import plotly.io as pio\n", @@ -156,19 +60,10 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/root/TransformerLens\n" - ] - } - ], + "outputs": [], "source": [ - "%cd ~/TransformerLens\n", - "import torch\n", "# NBVAL_IGNORE_OUTPUT\n", + "import torch\n", "_ = torch.set_grad_enabled(False)\n", "\n", "from transformers import AutoTokenizer\n", @@ -195,7 +90,8 @@ " prompt_toks = tokenizer(prompt, return_tensors=\"pt\").input_ids\n", "\n", " hf_logits = hf_model(prompt_toks.to(hf_model.device)).logits\n", - " tl_logits = tl_model(prompt_toks).to(hf_logits)\n", + " tl_device = next(tl_model.parameters()).device\n", + " tl_logits = tl_model(prompt_toks.to(tl_device)).to(hf_logits)\n", "\n", " assert torch.allclose(torch.softmax(hf_logits, dim=-1), torch.softmax(tl_logits, dim=-1), atol=atol)" ] @@ -204,95 +100,48 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Qwen, first generation" + "## Qwen, first generation\n", + "\n", + "> **Note:** Qwen gen1 (`Qwen/Qwen-1_8B-Chat`) requires `trust_remote_code=True` and depends on\n", + "> `transformers_stream_generator`, which is incompatible with `transformers>=5.0.0`\n", + "> (`DisjunctiveConstraint` was removed). This section is commented out until the upstream\n", + "> dependency is updated." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Your device support faster inference by passing bf16=True in \"AutoModelForCausalLM.from_pretrained\".\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2cffaf8715b64623b6799822d7cf1cfe", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Loading checkpoint shards: 0%| | 0/2 [00:00 None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/gemma3_multimodal.py b/transformer_lens/model_bridge/supported_architectures/gemma3_multimodal.py index ca9210896..6c9e71941 100644 --- a/transformer_lens/model_bridge/supported_architectures/gemma3_multimodal.py +++ b/transformer_lens/model_bridge/supported_architectures/gemma3_multimodal.py @@ -80,13 +80,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/llama.py b/transformer_lens/model_bridge/supported_architectures/llama.py index 7c9ae0a72..acba3bcab 100644 --- a/transformer_lens/model_bridge/supported_architectures/llama.py +++ b/transformer_lens/model_bridge/supported_architectures/llama.py @@ -77,13 +77,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/qwen2.py b/transformer_lens/model_bridge/supported_architectures/qwen2.py index 8a905e7c0..4a1b37153 100644 --- a/transformer_lens/model_bridge/supported_architectures/qwen2.py +++ b/transformer_lens/model_bridge/supported_architectures/qwen2.py @@ -62,13 +62,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( diff --git a/transformer_lens/model_bridge/supported_architectures/qwen3.py b/transformer_lens/model_bridge/supported_architectures/qwen3.py index 31a871b3c..55da0bc03 100644 --- a/transformer_lens/model_bridge/supported_architectures/qwen3.py +++ b/transformer_lens/model_bridge/supported_architectures/qwen3.py @@ -63,13 +63,13 @@ def __init__(self, cfg: Any) -> None: "blocks.{i}.attn.k.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.v.weight": ParamProcessingConversion( tensor_conversion=RearrangeTensorConversion( "(n h) m -> n m h", - n=getattr(self.cfg, "n_key_value_heads", self.cfg.n_heads), + n=getattr(self.cfg, "n_key_value_heads", None) or self.cfg.n_heads, ), ), "blocks.{i}.attn.o.weight": ParamProcessingConversion( From 8b485d3d4bca03c9cf5c52d196cf54b8d9528006 Mon Sep 17 00:00:00 2001 From: jlarson4 Date: Wed, 4 Mar 2026 18:43:26 -0600 Subject: [PATCH 4/4] Changing model to fit in CI --- demos/Qwen.ipynb | 101 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 88 insertions(+), 13 deletions(-) diff --git a/demos/Qwen.ipynb b/demos/Qwen.ipynb index f707d3307..1f4eb2c94 100644 --- a/demos/Qwen.ipynb +++ b/demos/Qwen.ipynb @@ -2,9 +2,18 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/jlarson/Documents/PROJECTS/TransformerLens/.venv/bin/python: No module named pip\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "%pip install transformers_stream_generator plotly circuitsvis huggingface_hub einops tiktoken datasets" @@ -12,9 +21,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running as a Jupyter notebook - intended for development only!\n" + ] + } + ], "source": [ "# NBVAL_IGNORE_OUTPUT\n", "# Janky code to do different setup when run in a Colab notebook vs VSCode\n", @@ -43,9 +60,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using renderer: colab\n" + ] + } + ], "source": [ "# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh\n", "import plotly.io as pio\n", @@ -58,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -148,17 +173,67 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Qwen, new generation" + "## Qwen2" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "00c489dffa18461f932645808546a319", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading weights: 0%| | 0/290 [00:00