From b3a862b87cf7b0222040cd575028ef3203b4adea Mon Sep 17 00:00:00 2001 From: Russole <850905junior@gmail.com> Date: Sat, 23 May 2026 10:02:29 +0800 Subject: [PATCH 1/3] HDDS-15348. OmMultipartPartKeyCodec should not use UTF8.decode(..) --- .../hadoop/ozone/om/helpers/OmMultipartPartKey.java | 11 ++++++++++- .../ozone/om/helpers/TestOmMultipartPartKey.java | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java index 92b71e471908..294d35b507fc 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java @@ -20,9 +20,11 @@ import jakarta.annotation.Nonnull; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import java.util.Objects; import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.CodecBuffer; +import org.apache.hadoop.hdds.utils.db.StringCodec; /** * Typed key for multipart parts table. @@ -184,7 +186,14 @@ private OmMultipartPartKey fromByteBuffer(ByteBuffer rawData) final ByteBuffer uploadIdBuffer = input.duplicate(); uploadIdBuffer.limit(separatorIndex); uploadIdBuffer.position(start); - String uploadId = StandardCharsets.UTF_8.decode(uploadIdBuffer).toString(); + byte[] uploadIdBytes = new byte[uploadIdBuffer.remaining()]; + uploadIdBuffer.get(uploadIdBytes); + String uploadId = StringCodec.get().fromPersistedFormat(uploadIdBytes); + if (!Arrays.equals(uploadIdBytes, + uploadId.getBytes(StandardCharsets.UTF_8))) { + throw new IllegalArgumentException( + "Invalid multipart part key: malformed UTF-8 uploadId"); + } if (suffixLength == 0) { return prefix(uploadId); } diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java index 309f39a9aa47..2a52a4d5122f 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java @@ -131,6 +131,13 @@ public void testDecodeRejectsInvalidKeyWithoutSeparator() { () -> codec.fromPersistedFormat("invalid".getBytes(UTF_8))); } + @Test + public void testDecodeRejectsMalformedUtf8UploadId() { + byte[] malformed = new byte[] {(byte) 0xC3, (byte) '/', 0, 0, 0, 1}; + assertThrows(IllegalArgumentException.class, + () -> codec.fromPersistedFormat(malformed)); + } + @Test public void testDecodeRejectsEmptyKey() { assertThrows(IllegalArgumentException.class, From 6b105f3b364ff2ecdd5e26d72ef58f6ce9dbd9f5 Mon Sep 17 00:00:00 2001 From: Russole <850905junior@gmail.com> Date: Thu, 28 May 2026 01:42:26 +0800 Subject: [PATCH 2/3] Throw CodecException in OmMultipartPartKeyCodec --- .../ozone/om/helpers/OmMultipartPartKey.java | 30 ++++++++----------- .../om/helpers/TestOmMultipartPartKey.java | 13 ++++---- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java index 294d35b507fc..5cdbeca1c170 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java @@ -20,10 +20,10 @@ import jakarta.annotation.Nonnull; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; -import java.util.Arrays; import java.util.Objects; import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.CodecBuffer; +import org.apache.hadoop.hdds.utils.db.CodecException; import org.apache.hadoop.hdds.utils.db.StringCodec; /** @@ -127,7 +127,7 @@ public CodecBuffer toCodecBuffer( @Override public OmMultipartPartKey fromCodecBuffer(@Nonnull CodecBuffer buffer) - throws IllegalArgumentException { + throws CodecException { return fromByteBuffer(buffer.asReadOnlyByteBuffer()); } @@ -157,20 +157,20 @@ public byte[] toPersistedFormat(OmMultipartPartKey key) { * Decodes the raw byte array from the key/value store into an OmMultipartPartKey object. * @param rawData Byte array from the key/value store. Should not be null. * @return OmMultipartPartKey object represented by the raw byte array. - * @throws IllegalArgumentException if the rawData format is invalid + * @throws CodecException if the rawData format is invalid */ @Override - public OmMultipartPartKey fromPersistedFormat(byte[] rawData) throws IllegalArgumentException { + public OmMultipartPartKey fromPersistedFormat(byte[] rawData) throws CodecException { return fromByteBuffer(ByteBuffer.wrap(rawData)); } private OmMultipartPartKey fromByteBuffer(ByteBuffer rawData) - throws IllegalArgumentException { + throws CodecException { final ByteBuffer input = rawData.asReadOnlyBuffer(); final int start = input.position(); final int length = input.remaining(); if (length == 0) { - throw new IllegalArgumentException( + throw new CodecException( "Invalid multipart part key: empty key"); } @@ -180,7 +180,7 @@ private OmMultipartPartKey fromByteBuffer(ByteBuffer rawData) int separatorIndex = start + length - suffixLength - 1; if (separatorIndex < start) { - throw new IllegalArgumentException( + throw new CodecException( "Invalid multipart part key: invalid separator position"); } final ByteBuffer uploadIdBuffer = input.duplicate(); @@ -188,17 +188,13 @@ private OmMultipartPartKey fromByteBuffer(ByteBuffer rawData) uploadIdBuffer.position(start); byte[] uploadIdBytes = new byte[uploadIdBuffer.remaining()]; uploadIdBuffer.get(uploadIdBytes); - String uploadId = StringCodec.get().fromPersistedFormat(uploadIdBytes); - if (!Arrays.equals(uploadIdBytes, - uploadId.getBytes(StandardCharsets.UTF_8))) { - throw new IllegalArgumentException( - "Invalid multipart part key: malformed UTF-8 uploadId"); - } + String uploadId = StringCodec.getCodecNoFallback() + .fromPersistedFormat(uploadIdBytes); if (suffixLength == 0) { return prefix(uploadId); } if (start + length - (separatorIndex + 1) != Integer.BYTES) { - throw new IllegalArgumentException( + throw new CodecException( "Invalid multipart part key: unexpected part suffix length"); } int part = input.getInt(separatorIndex + 1); @@ -220,10 +216,10 @@ public OmMultipartPartKey copyObject(OmMultipartPartKey object) { * @param start the position where key bytes start * @param length the number of bytes in the key * @return the length of the suffix (0 for prefix keys, Integer.BYTES for full keys) - * @throws IllegalArgumentException if the key format is invalid (missing separator or unexpected suffix length) + * @throws CodecException if the key format is invalid (missing separator or unexpected suffix length) */ private static int getSuffixLength(ByteBuffer rawData, int start, int length) - throws IllegalArgumentException { + throws CodecException { int suffixLength = -1; // Check full-key layout first. Otherwise, part numbers whose low byte is // '/' (for example 47 -> 0x0000002f) are mis-classified as prefix keys. @@ -234,7 +230,7 @@ private static int getSuffixLength(ByteBuffer rawData, int start, int length) suffixLength = 0; } if (suffixLength < 0) { - throw new IllegalArgumentException( + throw new CodecException( "Invalid multipart part key: missing separator"); } return suffixLength; diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java index 2a52a4d5122f..8c1992bdf677 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java @@ -26,6 +26,7 @@ import java.util.stream.IntStream; import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.CodecBuffer; +import org.apache.hadoop.hdds.utils.db.CodecException; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; @@ -127,27 +128,27 @@ public void testDecodeFullKeyWhenPartLowByteIsSeparator(int partNumber) @Test public void testDecodeRejectsInvalidKeyWithoutSeparator() { - assertThrows(IllegalArgumentException.class, + assertThrows(CodecException.class, () -> codec.fromPersistedFormat("invalid".getBytes(UTF_8))); } @Test public void testDecodeRejectsMalformedUtf8UploadId() { byte[] malformed = new byte[] {(byte) 0xC3, (byte) '/', 0, 0, 0, 1}; - assertThrows(IllegalArgumentException.class, + assertThrows(CodecException.class, () -> codec.fromPersistedFormat(malformed)); } @Test public void testDecodeRejectsEmptyKey() { - assertThrows(IllegalArgumentException.class, + assertThrows(CodecException.class, () -> codec.fromPersistedFormat(new byte[0])); } @Test public void testCodecBufferDecodeRejectsInvalidKeyWithoutSeparator() { try (CodecBuffer buffer = CodecBuffer.wrap("invalid".getBytes(UTF_8))) { - assertThrows(IllegalArgumentException.class, + assertThrows(CodecException.class, () -> codec.fromCodecBuffer(buffer)); } } @@ -155,7 +156,7 @@ public void testCodecBufferDecodeRejectsInvalidKeyWithoutSeparator() { @Test public void testCodecBufferDecodeRejectsEmptyKey() { try (CodecBuffer buffer = CodecBuffer.wrap(new byte[0])) { - assertThrows(IllegalArgumentException.class, + assertThrows(CodecException.class, () -> codec.fromCodecBuffer(buffer)); } } @@ -163,7 +164,7 @@ public void testCodecBufferDecodeRejectsEmptyKey() { @Test public void testDecodeRejectsMalformedKeyWithMiddleSeparatorOnly() { byte[] malformed = "up/xx".getBytes(UTF_8); - assertThrows(IllegalArgumentException.class, + assertThrows(CodecException.class, () -> codec.fromPersistedFormat(malformed)); } From 73851ff6b1680b89093fdd318c3f8d709b50c785 Mon Sep 17 00:00:00 2001 From: Russole <850905junior@gmail.com> Date: Fri, 29 May 2026 01:07:32 +0800 Subject: [PATCH 3/3] Use strict StringCodec in OmMultipartPartKeyCodec --- .../hadoop/ozone/om/helpers/OmMultipartPartKey.java | 12 +++++++----- .../ozone/om/helpers/TestOmMultipartPartKey.java | 11 +++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java index 5cdbeca1c170..86fa6fe31e2a 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OmMultipartPartKey.java @@ -19,7 +19,6 @@ import jakarta.annotation.Nonnull; import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; import java.util.Objects; import org.apache.hadoop.hdds.utils.db.Codec; import org.apache.hadoop.hdds.utils.db.CodecBuffer; @@ -113,8 +112,10 @@ public boolean supportCodecBuffer() { @Override public CodecBuffer toCodecBuffer( - @Nonnull OmMultipartPartKey key, CodecBuffer.Allocator allocator) { - byte[] uploadBytes = key.uploadId.getBytes(StandardCharsets.UTF_8); + @Nonnull OmMultipartPartKey key, CodecBuffer.Allocator allocator) + throws CodecException { + byte[] uploadBytes = StringCodec.getCodecNoFallback() + .toPersistedFormat(key.uploadId); int size = uploadBytes.length + 1 + (key.hasPartNumber() ? Integer.BYTES : 0); CodecBuffer buffer = allocator.apply(size); @@ -140,8 +141,9 @@ public OmMultipartPartKey fromCodecBuffer(@Nonnull CodecBuffer buffer) * @return Byte array representation of the object for storage in the key/value store. */ @Override - public byte[] toPersistedFormat(OmMultipartPartKey key) { - byte[] uploadBytes = key.uploadId.getBytes(StandardCharsets.UTF_8); + public byte[] toPersistedFormat(OmMultipartPartKey key) throws CodecException { + byte[] uploadBytes = StringCodec.getCodecNoFallback() + .toPersistedFormat(key.uploadId); int size = uploadBytes.length + 1 + (key.hasPartNumber() ? Integer.BYTES : 0); ByteBuffer buffer = ByteBuffer.allocate(size); diff --git a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java index 8c1992bdf677..2144f1c14b23 100644 --- a/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java +++ b/hadoop-ozone/common/src/test/java/org/apache/hadoop/ozone/om/helpers/TestOmMultipartPartKey.java @@ -209,4 +209,15 @@ public void testUploadIdContainingSlashRoundTrips() throws Exception { assertEquals("upload/with/slashes", decoded.getUploadId()); assertEquals(5, decoded.getPartNumber().intValue()); } + + @Test + public void testEncodeRejectsMalformedUploadId() { + OmMultipartPartKey key = OmMultipartPartKey.of("bad-\uD800", 1); + + assertThrows(CodecException.class, + () -> codec.toPersistedFormat(key)); + + assertThrows(CodecException.class, + () -> codec.toHeapCodecBuffer(key)); + } }