From 57df4794f7694a38c6ef364cf29793f590f31044 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:30:23 +0000 Subject: [PATCH 1/2] Initial plan From f6e581ba25de6b4975e5614cfb092b0a281c3fc3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:44:24 +0000 Subject: [PATCH 2/2] Add gpt-5.4 tokenizer mapping Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --- src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs | 2 ++ test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs index 8963a0c180..a460135c22 100644 --- a/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs +++ b/src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs @@ -1044,6 +1044,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo ( "o4-mini-", ModelEncoding.O200kBase ), // e.g. o4-mini // chat + ( "gpt-5.4-", ModelEncoding.O200kBase ), ( "gpt-5.3-", ModelEncoding.O200kBase ), ( "gpt-5.2-", ModelEncoding.O200kBase ), ( "gpt-5.1-", ModelEncoding.O200kBase ), @@ -1074,6 +1075,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo { "o4-mini", ModelEncoding.O200kBase }, // chat + { "gpt-5.4", ModelEncoding.O200kBase }, { "gpt-5.3", ModelEncoding.O200kBase }, { "gpt-5.2", ModelEncoding.O200kBase }, { "gpt-5.1", ModelEncoding.O200kBase }, diff --git a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs index 059c6e6035..cc3814143b 100644 --- a/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs +++ b/test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs @@ -39,6 +39,7 @@ public class TiktokenTests public static Tokenizer GPT5_1 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.1"); public static Tokenizer GPT5_2 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.2"); public static Tokenizer GPT5_3 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.3"); + public static Tokenizer GPT5_4 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.4"); public static Tokenizer Phi4 { get; } = TiktokenTokenizer.CreateForModel("phi-4"); public static TiktokenTokenizer GptOss { get; } = TiktokenTokenizer.CreateForModel("gpt-oss-20b"); @@ -424,6 +425,8 @@ public void TestEncodeR50kBase() [InlineData("gpt-5.2-mini")] [InlineData("gpt-5.3")] [InlineData("gpt-5.3-mini")] + [InlineData("gpt-5.4")] + [InlineData("gpt-5.4-nano")] [InlineData("chatgpt-4o-")] [InlineData("gpt-4")] [InlineData("gpt-4-")] @@ -545,6 +548,7 @@ public void TestEncodingNamesNegativeCases() [InlineData("gpt-5.1")] [InlineData("gpt-5.2")] [InlineData("gpt-5.3")] + [InlineData("gpt-5.4")] [InlineData("o1")] [InlineData("o3")] [InlineData("o4-mini")] @@ -924,4 +928,3 @@ public void TestLargeInputConsistency(int length) } } } -