From 8936e412c9b216ed3c9a739525e6bd11fba60adf Mon Sep 17 00:00:00 2001
From: Roger Wang <hey@rogerw.io>
Date: Thu, 4 Jun 2026 00:18:19 -0700
Subject: [PATCH] fix: strip <mm:think> reasoning token in scenario check

MiniMax-M3 wraps its chain-of-thought in a native <mm:think>...</mm:think>
token (both <think> and <mm:think> are special tokens in the M3 tokenizer),
carried inline in `content`. ScenarioCheckValidator._get_visible_content only
stripped <think>...</think>, so the parameter-order check read the order from
inside the reasoning instead of the visible answer, causing spurious
Scenario-Check-Pass-Rate failures on M3. Strip both tags.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 validator/scenario_check.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/validator/scenario_check.py b/validator/scenario_check.py
index 4df9b14..e3f70de 100644
--- a/validator/scenario_check.py
+++ b/validator/scenario_check.py
@@ -34,8 +34,8 @@ def _extract_expected_order(request: dict) -> Optional[List[str]]:
 
     @staticmethod
     def _get_visible_content(text: str) -> str:
-        """Strip <think>...</think> blocks to get visible reply only."""
-        return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
+        """Strip <think>/<mm:think> reasoning blocks to get visible reply only."""
+        return re.sub(r"<(?:mm:)?think>.*?</(?:mm:)?think>", "", text, flags=re.DOTALL).strip()
 
     @staticmethod
     def _extract_actual_order(text: str, expected: list[str]) -> list[str]: