From 282a469a71b668bb48e4b1898025c6daa6c24b36 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Mon, 25 May 2026 15:16:05 +0800 Subject: [PATCH 1/9] wip Signed-off-by: peterxcli --- .../ratis/client/DataStreamClientRpc.java | 8 + .../ratis/client/api/DataStreamApi.java | 11 ++ .../ratis/client/api/DataStreamInput.java | 44 +++++ .../ratis/client/impl/ClientProtoUtils.java | 17 +- .../client/impl/DataStreamClientImpl.java | 111 ++++++++++- .../impl/DataStreamReplyByteBuf.java | 174 +++++++++++++++++ .../impl/DataStreamReplyByteBuffers.java | 181 ++++++++++++++++++ .../ratis/protocol/DataStreamReply.java | 8 +- .../ratis/netty/NettyDataStreamUtils.java | 41 +++- .../netty/client/NettyClientStreamRpc.java | 132 ++++++++++++- .../netty/server/DataStreamManagement.java | 103 ++++++++++ .../netty/server/NettyServerStreamRpc.java | 19 +- .../org/apache/ratis/server/RaftServer.java | 15 ++ .../ratis/statemachine/StateMachine.java | 47 +++++ .../ratis/server/impl/RaftServerImpl.java | 50 ++++- .../ratis/server/impl/RaftServerProxy.java | 7 + .../datastream/DataStreamClusterTests.java | 45 +++++ .../ratis/datastream/DataStreamTestUtils.java | 22 +++ 18 files changed, 1016 insertions(+), 19 deletions(-) create mode 100644 ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamInput.java create mode 100644 ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuf.java create mode 100644 ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuffers.java diff --git a/ratis-client/src/main/java/org/apache/ratis/client/DataStreamClientRpc.java b/ratis-client/src/main/java/org/apache/ratis/client/DataStreamClientRpc.java index a9bcd9d58a..fd5bd9538e 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/DataStreamClientRpc.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/DataStreamClientRpc.java @@ -24,6 +24,7 @@ import java.io.Closeable; import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; /** * A client interface for sending stream requests. @@ -36,4 +37,11 @@ default CompletableFuture streamAsync(DataStreamRequest request throw new UnsupportedOperationException(getClass() + " does not support " + JavaUtils.getCurrentStackTraceElement().getMethodName()); } + + /** Async call to send a request and receive multiple replies for the request. */ + default CompletableFuture streamAsync( + DataStreamRequest request, Consumer replyConsumer) { + throw new UnsupportedOperationException(getClass() + " does not support " + + JavaUtils.getCurrentStackTraceElement().getMethodName()); + } } diff --git a/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamApi.java b/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamApi.java index 9e5e2438cb..85f237e664 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamApi.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamApi.java @@ -50,4 +50,15 @@ default DataStreamOutput stream() { /** Create a stream by providing a customized header message and route table. */ DataStreamOutput stream(ByteBuffer headerMessage, RoutingTable routingTable); + + /** + * Create a stream to read data for readonly requests. + * This corresponds to {@link AsyncApi#sendReadOnly(org.apache.ratis.protocol.Message)}. + */ + default DataStreamInput streamReadOnly() { + return streamReadOnly(null); + } + + /** Create a stream by providing a customized header message for readonly requests. */ + DataStreamInput streamReadOnly(ByteBuffer message); } diff --git a/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamInput.java b/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamInput.java new file mode 100644 index 0000000000..4f56aa9d57 --- /dev/null +++ b/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamInput.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.client.api; + +import org.apache.ratis.protocol.DataStreamReply; +import org.apache.ratis.protocol.RaftClientReply; + +import java.io.Closeable; +import java.util.concurrent.CompletableFuture; + +/** + * An asynchronous input stream supporting zero buffer copying. + */ +public interface DataStreamInput extends Closeable { + /** + * Read the next chunk in the stream asynchronously. + * + * @return a future of the reply. + */ + CompletableFuture readAsync(); + + /** + * Return the future of the {@link RaftClientReply} + * which will be received once the read-only stream has received a reply. + * + * @return the future of the {@link RaftClientReply}. + */ + CompletableFuture getRaftClientReplyFuture(); +} diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java index d2146a521f..f6c06bd323 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java @@ -18,6 +18,7 @@ package org.apache.ratis.client.impl; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; import org.apache.ratis.proto.RaftProtos.AlreadyClosedExceptionProto; import org.apache.ratis.proto.RaftProtos.ClientMessageEntryProto; import org.apache.ratis.proto.RaftProtos.GroupAddRequestProto; @@ -378,22 +379,24 @@ static GroupInfoReplyProto toGroupInfoReplyProto(GroupInfoReply reply) { return b.build(); } - static RaftClientReply getRaftClientReply(DataStreamReply reply) { - if (!(reply instanceof DataStreamReplyByteBuffer)) { - throw new IllegalStateException("Unexpected " + reply.getClass() + ": reply is " + reply); - } + public static RaftClientReply getRaftClientReply(DataStreamReply reply) { try { - return toRaftClientReply(((DataStreamReplyByteBuffer) reply).slice()); + if (reply instanceof DataStreamReplyByteBuffer) { + return toRaftClientReply(((DataStreamReplyByteBuffer) reply).slice()); + } else if (reply instanceof DataStreamReplyByteBuf) { + return toRaftClientReply(((DataStreamReplyByteBuf) reply).slice().nioBuffer()); + } + throw new IllegalStateException("Unexpected " + reply.getClass() + ": reply is " + reply); } catch (InvalidProtocolBufferException e) { throw new IllegalStateException("Failed to getRaftClientReply from " + reply, e); } } - static RaftClientReply toRaftClientReply(ByteBuffer buffer) throws InvalidProtocolBufferException { + public static RaftClientReply toRaftClientReply(ByteBuffer buffer) throws InvalidProtocolBufferException { return toRaftClientReply(RaftClientReplyProto.parseFrom(buffer)); } - static RaftClientReply toRaftClientReply(RaftClientReplyProto replyProto) { + public static RaftClientReply toRaftClientReply(RaftClientReplyProto replyProto) { final RaftRpcReplyProto rp = replyProto.getRpcReply(); final RaftGroupMemberId serverMemberId = ProtoUtils.toRaftGroupMemberId(rp.getReplyId(), rp.getRaftGroupId()); diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java index 313131cbda..82d6859194 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java @@ -23,9 +23,11 @@ import org.apache.ratis.client.DataStreamClientRpc; import org.apache.ratis.client.DataStreamOutputRpc; import org.apache.ratis.client.RaftClient; +import org.apache.ratis.client.api.DataStreamInput; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamPacketByteBuffer; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; import org.apache.ratis.io.FilePositionCount; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.io.WriteOption; @@ -34,16 +36,17 @@ import org.apache.ratis.protocol.ClientInvocationId; import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.DataStreamRequestHeader; +import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftPeer; +import org.apache.ratis.protocol.RoutingTable; import org.apache.ratis.protocol.exceptions.AlreadyClosedException; import org.apache.ratis.rpc.CallId; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.util.IOUtils; -import org.apache.ratis.protocol.*; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.Preconditions; @@ -54,10 +57,12 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; +import java.util.ArrayDeque; import java.util.Arrays; import java.util.Collections; import java.util.Objects; import java.util.Optional; +import java.util.Queue; import java.util.concurrent.CompletableFuture; /** @@ -237,6 +242,93 @@ private CompletableFuture sendForward(DataStreamReply writeRepl } } + public final class DataStreamInputImpl implements DataStreamInput { + private final RaftClientRequest header; + private final CompletableFuture replyFuture; + private final CompletableFuture raftClientReplyFuture = new CompletableFuture<>(); + private final Queue replies = new ArrayDeque<>(); + private final Queue> pendingReads = new ArrayDeque<>(); + private Throwable readException; + private boolean closed; + + private DataStreamInputImpl(RaftClientRequest request) { + this.header = request; + final ByteBuffer buffer = ClientProtoUtils.toRaftClientRequestProtoByteBuffer(header); + final DataStreamRequestHeader h = new DataStreamRequestHeader(header.getClientId(), Type.STREAM_HEADER, + header.getCallId(), 0, buffer.remaining(), StandardWriteOption.FLUSH, StandardWriteOption.CLOSE); + this.replyFuture = dataStreamClientRpc.streamAsync(new DataStreamRequestByteBuffer(h, buffer), this::receive); + replyFuture.thenApply(ClientProtoUtils::getRaftClientReply) + .whenComplete(JavaUtils.asBiConsumer(raftClientReplyFuture)); + replyFuture.whenComplete((reply, exception) -> { + if (exception != null) { + failReads(exception); + } + }); + } + + private void receive(DataStreamReply reply) { + final CompletableFuture pending; + synchronized (this) { + if (closed) { + reply.release(); + return; + } + pending = pendingReads.poll(); + if (pending == null) { + replies.add(reply); + return; + } + } + pending.complete(reply); + } + + private void failReads(Throwable t) { + for (;;) { + final CompletableFuture pending; + synchronized (this) { + readException = t; + pending = pendingReads.poll(); + if (pending == null) { + return; + } + } + pending.completeExceptionally(t); + } + } + + @Override + public synchronized CompletableFuture readAsync() { + if (closed) { + return JavaUtils.completeExceptionally(new AlreadyClosedException( + clientId + ": stream already closed, request=" + header)); + } + final DataStreamReply reply = replies.poll(); + if (reply != null) { + return CompletableFuture.completedFuture(reply); + } + if (readException != null) { + return JavaUtils.completeExceptionally(readException); + } + final CompletableFuture f = new CompletableFuture<>(); + pendingReads.add(f); + return f; + } + + @Override + public CompletableFuture getRaftClientReplyFuture() { + return raftClientReplyFuture; + } + + @Override + public synchronized void close() { + closed = true; + for (DataStreamReply reply; (reply = replies.poll()) != null;) { + reply.release(); + } + failReads(new AlreadyClosedException(clientId + ": stream already closed, request=" + header)); + } + } + @Override public DataStreamClientRpc getClientRpc() { return dataStreamClientRpc; @@ -274,6 +366,21 @@ public DataStreamOutputRpc stream(ByteBuffer headerMessage, RoutingTable routing return new DataStreamOutputImpl(request); } + @Override + public DataStreamInput streamReadOnly(ByteBuffer headerMessage) { + final Message message = + Optional.ofNullable(headerMessage).map(ByteString::copyFrom).map(Message::valueOf).orElse(null); + final RaftClientRequest request = RaftClientRequest.newBuilder() + .setClientId(clientId) + .setServerId(dataStreamServer.getId()) + .setGroupId(groupId) + .setCallId(CallId.getAndIncrement()) + .setMessage(message) + .setType(RaftClientRequest.readRequestType()) + .build(); + return new DataStreamInputImpl(request); + } + @Override public void close() throws IOException { dataStreamClientRpc.close(); diff --git a/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuf.java b/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuf.java new file mode 100644 index 0000000000..d94d3f0c33 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuf.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.datastream.impl; + +import org.apache.ratis.proto.RaftProtos.CommitInfoProto; +import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.DataStreamPacket; +import org.apache.ratis.protocol.DataStreamReply; +import org.apache.ratis.protocol.DataStreamReplyHeader; +import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; +import org.apache.ratis.thirdparty.io.netty.buffer.Unpooled; + +import java.util.Collection; +import java.util.Collections; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Implements {@link DataStreamReply} with {@link ByteBuf}. + */ +public final class DataStreamReplyByteBuf extends DataStreamPacketImpl implements DataStreamReply { + public static final class Builder { + private ClientId clientId; + private Type type; + private long streamId; + private long streamOffset; + private ByteBuf buffer; + + private boolean success; + private long bytesWritten; + private Collection commitInfos; + + private Builder() { + } + + public Builder setClientId(ClientId clientId) { + this.clientId = clientId; + return this; + } + + public Builder setType(Type type) { + this.type = type; + return this; + } + + public Builder setStreamId(long streamId) { + this.streamId = streamId; + return this; + } + + public Builder setStreamOffset(long streamOffset) { + this.streamOffset = streamOffset; + return this; + } + + public Builder setBuffer(ByteBuf buffer) { + this.buffer = buffer; + return this; + } + + public Builder setSuccess(boolean success) { + this.success = success; + return this; + } + + public Builder setBytesWritten(long bytesWritten) { + this.bytesWritten = bytesWritten; + return this; + } + + public Builder setCommitInfos(Collection commitInfos) { + this.commitInfos = commitInfos; + return this; + } + + public Builder setDataStreamReplyHeader(DataStreamReplyHeader header) { + return setDataStreamPacket(header) + .setSuccess(header.isSuccess()) + .setBytesWritten(header.getBytesWritten()) + .setCommitInfos(header.getCommitInfos()); + } + + public Builder setDataStreamPacket(DataStreamPacket packet) { + return setClientId(packet.getClientId()) + .setType(packet.getType()) + .setStreamId(packet.getStreamId()) + .setStreamOffset(packet.getStreamOffset()); + } + + public DataStreamReplyByteBuf build() { + return new DataStreamReplyByteBuf( + clientId, type, streamId, streamOffset, buffer, success, bytesWritten, commitInfos); + } + } + + public static Builder newBuilder() { + return new Builder(); + } + + private final AtomicReference buffer; + private final boolean success; + private final long bytesWritten; + private final Collection commitInfos; + + @SuppressWarnings("parameternumber") + private DataStreamReplyByteBuf(ClientId clientId, Type type, long streamId, long streamOffset, ByteBuf buffer, + boolean success, long bytesWritten, Collection commitInfos) { + super(clientId, type, streamId, streamOffset); + this.buffer = new AtomicReference<>(buffer != null ? buffer.asReadOnly() : Unpooled.EMPTY_BUFFER); + this.success = success; + this.bytesWritten = bytesWritten; + this.commitInfos = commitInfos != null ? commitInfos : Collections.emptyList(); + } + + private ByteBuf getBuffer() { + return Optional.ofNullable(buffer.get()).orElseThrow( + () -> new IllegalStateException("buffer is already released in " + this)); + } + + @Override + public long getDataLength() { + return getBuffer().readableBytes(); + } + + public ByteBuf slice() { + return getBuffer().slice(); + } + + @Override + public boolean isSuccess() { + return success; + } + + @Override + public long getBytesWritten() { + return bytesWritten; + } + + @Override + public Collection getCommitInfos() { + return commitInfos; + } + + @Override + public void release() { + final ByteBuf got = buffer.getAndSet(null); + if (got != null && got != Unpooled.EMPTY_BUFFER) { + got.release(); + } + } + + @Override + public String toString() { + return super.toString() + + "," + (success ? "SUCCESS" : "FAILED") + + ",bytesWritten=" + bytesWritten; + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuffers.java b/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuffers.java new file mode 100644 index 0000000000..b36f045a74 --- /dev/null +++ b/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuffers.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ratis.datastream.impl; + +import org.apache.ratis.proto.RaftProtos.CommitInfoProto; +import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.DataStreamPacket; +import org.apache.ratis.protocol.DataStreamReply; +import org.apache.ratis.protocol.DataStreamReplyHeader; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +/** + * Implements {@link DataStreamReply} with multiple {@link ByteBuffer}s. + * + * This class is immutable. + */ +public final class DataStreamReplyByteBuffers extends DataStreamPacketImpl implements DataStreamReply { + public static final class Builder { + private ClientId clientId; + private Type type; + private long streamId; + private long streamOffset; + private Iterable buffers; + + private boolean success; + private long bytesWritten; + private Collection commitInfos; + + private Builder() { + } + + public Builder setClientId(ClientId clientId) { + this.clientId = clientId; + return this; + } + + public Builder setType(Type type) { + this.type = type; + return this; + } + + public Builder setStreamId(long streamId) { + this.streamId = streamId; + return this; + } + + public Builder setStreamOffset(long streamOffset) { + this.streamOffset = streamOffset; + return this; + } + + public Builder setBuffers(Iterable buffers) { + this.buffers = buffers; + return this; + } + + public Builder setSuccess(boolean success) { + this.success = success; + return this; + } + + public Builder setBytesWritten(long bytesWritten) { + this.bytesWritten = bytesWritten; + return this; + } + + public Builder setCommitInfos(Collection commitInfos) { + this.commitInfos = commitInfos; + return this; + } + + public Builder setDataStreamReplyHeader(DataStreamReplyHeader header) { + return setDataStreamPacket(header) + .setSuccess(header.isSuccess()) + .setBytesWritten(header.getBytesWritten()) + .setCommitInfos(header.getCommitInfos()); + } + + public Builder setDataStreamPacket(DataStreamPacket packet) { + return setClientId(packet.getClientId()) + .setType(packet.getType()) + .setStreamId(packet.getStreamId()) + .setStreamOffset(packet.getStreamOffset()); + } + + public DataStreamReplyByteBuffers build() { + return new DataStreamReplyByteBuffers(clientId, type, streamId, + streamOffset, buffers, success, bytesWritten, commitInfos); + } + } + + public static Builder newBuilder() { + return new Builder(); + } + + private final List buffers; + private final long dataLength; + private final boolean success; + private final long bytesWritten; + private final Collection commitInfos; + + @SuppressWarnings("parameternumber") + private DataStreamReplyByteBuffers(ClientId clientId, Type type, + long streamId, long streamOffset, Iterable buffers, + boolean success, long bytesWritten, + Collection commitInfos) { + super(clientId, type, streamId, streamOffset); + final List readOnlyBuffers = new ArrayList<>(); + long length = 0; + if (buffers != null) { + for (ByteBuffer buffer : buffers) { + final ByteBuffer readOnly = buffer.asReadOnlyBuffer(); + readOnlyBuffers.add(readOnly); + length += readOnly.remaining(); + } + } + this.buffers = Collections.unmodifiableList(readOnlyBuffers); + this.dataLength = length; + this.success = success; + this.bytesWritten = bytesWritten; + this.commitInfos = commitInfos != null ? commitInfos + : Collections.emptyList(); + } + + @Override + public long getDataLength() { + return dataLength; + } + + public List slices() { + final List slices = new ArrayList<>(buffers.size()); + for (ByteBuffer buffer : buffers) { + slices.add(buffer.slice()); + } + return slices; + } + + @Override + public boolean isSuccess() { + return success; + } + + @Override + public long getBytesWritten() { + return bytesWritten; + } + + @Override + public Collection getCommitInfos() { + return commitInfos; + } + + @Override + public String toString() { + return super.toString() + + "," + (success ? "SUCCESS" : "FAILED") + + ",bytesWritten=" + bytesWritten; + } +} diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/DataStreamReply.java b/ratis-common/src/main/java/org/apache/ratis/protocol/DataStreamReply.java index 459aee363c..8c6c4466cf 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/DataStreamReply.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/DataStreamReply.java @@ -30,4 +30,10 @@ public interface DataStreamReply extends DataStreamPacket { /** @return the commit information when the reply is created. */ Collection getCommitInfos(); -} \ No newline at end of file + + /** + * Release resources owned by this reply. + */ + default void release() { + } +} diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java index 583d6e3e94..4ecf3e2077 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java @@ -18,6 +18,8 @@ package org.apache.ratis.netty; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffers; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; import org.apache.ratis.datastream.impl.DataStreamRequestFilePositionCount; import org.apache.ratis.io.FilePositionCount; @@ -30,6 +32,7 @@ import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.DataStreamPacketHeader; import org.apache.ratis.protocol.DataStreamReplyHeader; +import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.DataStreamRequest; import org.apache.ratis.protocol.DataStreamRequestHeader; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; @@ -80,7 +83,7 @@ static ByteBuffer getDataStreamRequestHeaderProtoByteBuffer(DataStreamRequest re .asReadOnlyByteBuffer(); } - static ByteBuffer getDataStreamReplyHeaderProtoByteBuf(DataStreamReplyByteBuffer reply) { + static ByteBuffer getDataStreamReplyHeaderProtoByteBuf(DataStreamReply reply) { DataStreamPacketHeaderProto.Builder b = DataStreamPacketHeaderProto .newBuilder() .setClientId(reply.getClientId().toByteString()) @@ -159,6 +162,18 @@ static void encodeDataStreamReplyByteBuffer(DataStreamReplyByteBuffer reply, Con out.accept(Unpooled.wrappedBuffer(reply.slice())); } + static void encodeDataStreamReplyByteBuffers(DataStreamReplyByteBuffers reply, Consumer out, + ByteBufAllocator allocator) { + ByteBuffer headerBuf = getDataStreamReplyHeaderProtoByteBuf(reply); + final ByteBuf headerLenBuf = allocator.ioBuffer(DataStreamPacketHeader.getSizeOfHeaderLen()); + headerLenBuf.writeInt(headerBuf.remaining()); + out.accept(headerLenBuf); + out.accept(Unpooled.wrappedBuffer(headerBuf)); + for (ByteBuffer buffer : reply.slices()) { + out.accept(Unpooled.wrappedBuffer(buffer)); + } + } + static DataStreamRequestByteBuf decodeDataStreamRequestByteBuf(ByteBuf buf) { return Optional.ofNullable(decodeDataStreamRequestHeader(buf)) .map(header -> checkHeader(header, buf)) @@ -224,6 +239,30 @@ static DataStreamReplyByteBuffer decodeDataStreamReplyByteBuffer(ByteBuf buf) { .orElse(null); } + static DataStreamReplyByteBuffer toDataStreamReplyByteBuffer(DataStreamReplyByteBuf reply) { + try { + return DataStreamReplyByteBuffer.newBuilder() + .setDataStreamPacket(reply) + .setBuffer(copy(reply.slice())) + .setSuccess(reply.isSuccess()) + .setBytesWritten(reply.getBytesWritten()) + .setCommitInfos(reply.getCommitInfos()) + .build(); + } finally { + reply.release(); + } + } + + static DataStreamReplyByteBuf decodeDataStreamReplyByteBuf(ByteBuf buf) { + return Optional.ofNullable(decodeDataStreamReplyHeader(buf)) + .map(header -> checkHeader(header, buf)) + .map(header -> DataStreamReplyByteBuf.newBuilder() + .setDataStreamReplyHeader(header) + .setBuffer(decodeData(buf, header, ByteBuf::retainedSlice)) + .build()) + .orElse(null); + } + static DataStreamReplyHeader decodeDataStreamReplyHeader(ByteBuf buf) { if (DataStreamPacketHeader.getSizeOfHeaderLen() > buf.readableBytes()) { return null; diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java index 2bfeea31e1..3716b2a760 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java @@ -23,6 +23,7 @@ import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; import org.apache.ratis.datastream.impl.DataStreamRequestFilePositionCount; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.io.WriteOption; @@ -55,6 +56,7 @@ import org.apache.ratis.thirdparty.io.netty.handler.codec.ByteToMessageDecoder; import org.apache.ratis.thirdparty.io.netty.handler.codec.MessageToMessageEncoder; import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContext; +import org.apache.ratis.thirdparty.io.netty.util.concurrent.ScheduledFuture; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.MemoizedFunction; import org.apache.ratis.util.MemoizedSupplier; @@ -73,10 +75,13 @@ import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Supplier; @@ -320,10 +325,66 @@ synchronized boolean shouldFlush(int countMin, SizeInBytes bytesMin, DataStreamR } } + static class ReadOnlyStreamingReply { + private final NettyClientReplies.RequestEntry terminalEntry; + private final CompletableFuture replyFuture; + private final Consumer replyConsumer; + private Supplier> timeoutScheduler; + private ScheduledFuture timeoutFuture; + + ReadOnlyStreamingReply(DataStreamRequest request, CompletableFuture replyFuture, + Consumer replyConsumer) { + this.terminalEntry = new NettyClientReplies.RequestEntry(request); + this.replyFuture = replyFuture; + this.replyConsumer = replyConsumer; + } + + synchronized boolean receiveReply(DataStreamReply reply) { + NettyClientReplies.ReplyEntry.cancel(timeoutFuture); + final boolean terminal = !reply.isSuccess() || terminalEntry.equals(new NettyClientReplies.RequestEntry(reply)); + final DataStreamReply replyToComplete = terminal && reply instanceof DataStreamReplyByteBuf ? + NettyDataStreamUtils.toDataStreamReplyByteBuffer((DataStreamReplyByteBuf) reply) : reply; + try { + replyConsumer.accept(replyToComplete); + } catch (Throwable t) { + if (replyToComplete == reply) { + reply.release(); + } + completeExceptionally(t); + return true; + } + + if (terminal) { + replyFuture.complete(replyToComplete); + return true; + } + scheduleTimeout(); + return false; + } + + synchronized void completeExceptionally(Throwable t) { + NettyClientReplies.ReplyEntry.cancel(timeoutFuture); + replyFuture.completeExceptionally(t); + } + + synchronized void scheduleTimeout(Supplier> scheduleMethod) { + timeoutScheduler = scheduleMethod; + scheduleTimeout(); + } + + private void scheduleTimeout() { + if (!replyFuture.isDone() && timeoutScheduler != null) { + timeoutFuture = timeoutScheduler.get(); + } + } + } + private final String name; private final Connection connection; private final NettyClientReplies replies = new NettyClientReplies(); + private final ConcurrentMap readOnlyStreamingReplies + = new ConcurrentHashMap<>(); private final TimeDuration requestTimeout; private final TimeDuration closeTimeout; @@ -361,17 +422,35 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) { LOG.debug("{}: read {}", name, reply); final ClientInvocationId clientInvocationId = ClientInvocationId.valueOf( reply.getClientId(), reply.getStreamId()); + final ReadOnlyStreamingReply readOnlyStreamingReply = readOnlyStreamingReplies.get(clientInvocationId); + if (readOnlyStreamingReply != null) { + try { + if (readOnlyStreamingReply.receiveReply(reply)) { + readOnlyStreamingReplies.remove(clientInvocationId, readOnlyStreamingReply); + } + } catch (Throwable cause) { + LOG.warn("{} : channelRead error:", name, cause); + readOnlyStreamingReplies.remove(clientInvocationId, readOnlyStreamingReply); + readOnlyStreamingReply.completeExceptionally(cause); + } + return; + } + final NettyClientReplies.ReplyMap replyMap = replies.getReplyMap(clientInvocationId); if (replyMap == null) { LOG.error("{}: {} replyMap not found for reply: {}", name, clientInvocationId, reply); + reply.release(); return; } try { - replyMap.receiveReply(reply); + final DataStreamReply replyToReceive = reply instanceof DataStreamReplyByteBuf ? + NettyDataStreamUtils.toDataStreamReplyByteBuffer((DataStreamReplyByteBuf) reply) : reply; + replyMap.receiveReply(replyToReceive); } catch (Throwable cause) { LOG.warn("{} : channelRead error:", name, cause); replyMap.completeExceptionally(cause); + reply.release(); } } @@ -456,7 +535,7 @@ static ByteToMessageDecoder newDecoder() { @Override protected void decode(ChannelHandlerContext context, ByteBuf buf, List out) { - Optional.ofNullable(NettyDataStreamUtils.decodeDataStreamReplyByteBuffer(buf)).ifPresent(out::add); + Optional.ofNullable(NettyDataStreamUtils.decodeDataStreamReplyByteBuf(buf)).ifPresent(out::add); } }; } @@ -507,6 +586,55 @@ public CompletableFuture streamAsync(DataStreamRequest request) return f; } + @Override + public CompletableFuture streamAsync( + DataStreamRequest request, Consumer replyConsumer) { + final CompletableFuture f = new CompletableFuture<>(); + final ClientInvocationId clientInvocationId = ClientInvocationId.valueOf(request.getClientId(), + request.getStreamId()); + final ReadOnlyStreamingReply replyEntry = new ReadOnlyStreamingReply(request, f, replyConsumer); + if (readOnlyStreamingReplies.putIfAbsent(clientInvocationId, replyEntry) != null) { + f.completeExceptionally(new AlreadyClosedException(this + ": A read-only stream already exists for " + + clientInvocationId)); + return f; + } + + final ChannelFuture channelFuture; + final Channel channel; + LOG.debug("{}: write read-only stream begin {}", this, request); + synchronized (replyEntry) { + channel = connection.getChannelUninterruptibly(); + if (channel == null) { + readOnlyStreamingReplies.remove(clientInvocationId, replyEntry); + f.completeExceptionally(new AlreadyClosedException(this + ": Failed to send " + request)); + return f; + } + final Function writeMethod = outstandingRequests.shouldFlush( + flushRequestCountMin, flushRequestBytesMin, request)? channel::writeAndFlush: channel::write; + channelFuture = writeMethod.apply(request); + } + channelFuture.addListener(future -> { + if (!future.isSuccess()) { + readOnlyStreamingReplies.remove(clientInvocationId, replyEntry); + final IOException e = new IOException(this + ": Failed to send " + request + " to " + channel.remoteAddress(), + future.cause()); + replyEntry.completeExceptionally(e); + LOG.error("Channel write failed", e); + } else { + LOG.debug("{}: write read-only stream after {}", this, request); + + replyEntry.scheduleTimeout(() -> channel.eventLoop().schedule(() -> { + if (!f.isDone()) { + readOnlyStreamingReplies.remove(clientInvocationId, replyEntry); + replyEntry.completeExceptionally(new TimeoutIOException( + "Timeout " + requestTimeout + ": Failed to send " + request + " via channel " + channel)); + } + }, requestTimeout.getDuration(), requestTimeout.getUnit())); + } + }); + return f; + } + @Override public void close() { final boolean flush = outstandingRequests.shouldFlush(0, SizeInBytes.ZERO, null); diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java index f3b98054d0..ab5b48ca94 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java @@ -23,6 +23,7 @@ import org.apache.ratis.client.impl.DataStreamClientImpl.DataStreamOutputImpl; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffers; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.io.WriteOption; @@ -33,6 +34,7 @@ import org.apache.ratis.proto.RaftProtos.CommitInfoProto; import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto; +import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto.TypeCase; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.ClientInvocationId; import org.apache.ratis.protocol.DataStreamReply; @@ -69,6 +71,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -352,9 +355,56 @@ static DataStreamReplyByteBuffer newDataStreamReplyByteBuffer(DataStreamRequestB .setDataStreamPacket(request) .setBuffer(buffer) .setSuccess(reply.isSuccess()) + .setCommitInfos(reply.getCommitInfos()) .build(); } + static DataStreamReplyByteBuffer newDataStreamReadOnlyReplyByteBuffer(DataStreamRequestByteBuf request, + long streamOffset, ByteBuffer buffer) { + final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer(); + return DataStreamReplyByteBuffer.newBuilder() + .setClientId(request.getClientId()) + .setType(Type.STREAM_DATA) + .setStreamId(request.getStreamId()) + .setStreamOffset(streamOffset) + .setBuffer(readOnlyBuffer) + .setSuccess(true) + .setBytesWritten(readOnlyBuffer.remaining()) + .build(); + } + + static DataStreamReplyByteBuffers newDataStreamReadOnlyReplyByteBuffers(DataStreamRequestByteBuf request, + long streamOffset, Iterable buffers) { + final List list = new ArrayList<>(); + long dataLength = 0; + for (ByteBuffer buffer : buffers) { + final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer(); + list.add(readOnlyBuffer); + dataLength += readOnlyBuffer.remaining(); + } + return DataStreamReplyByteBuffers.newBuilder() + .setClientId(request.getClientId()) + .setType(Type.STREAM_DATA) + .setStreamId(request.getStreamId()) + .setStreamOffset(streamOffset) + .setBuffers(list) + .setSuccess(true) + .setBytesWritten(dataLength) + .build(); + } + + private static CompletableFuture writeAndFlush(ChannelHandlerContext ctx, DataStreamReply reply) { + final CompletableFuture future = new CompletableFuture<>(); + ctx.writeAndFlush(reply).addListener(channelFuture -> { + if (channelFuture.isSuccess()) { + future.complete(null); + } else { + future.completeExceptionally(channelFuture.cause()); + } + }); + return future; + } + private void sendReply(List> remoteWrites, DataStreamRequestByteBuf request, long bytesWritten, Collection commitInfos, ChannelHandlerContext ctx) { @@ -450,6 +500,23 @@ private void readImpl(DataStreamRequestByteBuf request, ChannelHandlerContext ct // add to ChannelMap channels.add(channelId, key); + if (request.getType() == Type.STREAM_HEADER) { + final RaftClientRequest raftClientRequest = toRaftClientRequest(request); + if (raftClientRequest.is(TypeCase.READ)) { + submitReadOnlyRequest(request, raftClientRequest, ctx).whenComplete((v, exception) -> { + try { + if (exception != null) { + replyDataStreamException(server, exception, raftClientRequest, request, ctx); + } + } finally { + request.release(); + channels.remove(channelId, key); + } + }); + return; + } + } + final StreamInfo info; if (request.getType() == Type.STREAM_HEADER) { final MemoizedSupplier supplier = JavaUtils.memoize( @@ -510,6 +577,42 @@ private void readImpl(DataStreamRequestByteBuf request, ChannelHandlerContext ct }); } + private static RaftClientRequest toRaftClientRequest(DataStreamRequestByteBuf request) { + try { + return ClientProtoUtils.toRaftClientRequest(RaftClientRequestProto.parseFrom(request.slice().nioBuffer())); + } catch (Throwable e) { + throw new CompletionException(e); + } + } + + private CompletableFuture submitReadOnlyRequest(DataStreamRequestByteBuf request, + RaftClientRequest raftClientRequest, ChannelHandlerContext ctx) { + try { + final StateMachine.ReadOnlyDataStream readOnlyDataStream = new StateMachine.ReadOnlyDataStream() { + private long streamOffset; + + @Override + public synchronized CompletableFuture writeAsync(ByteBuffer buffer) { + final DataStreamReplyByteBuffer reply = newDataStreamReadOnlyReplyByteBuffer(request, streamOffset, buffer); + streamOffset += reply.getDataLength(); + return writeAndFlush(ctx, reply); + } + + @Override + public synchronized CompletableFuture writeAsync(Iterable buffers) { + final DataStreamReplyByteBuffers reply = newDataStreamReadOnlyReplyByteBuffers( + request, streamOffset, buffers); + streamOffset += reply.getDataLength(); + return writeAndFlush(ctx, reply); + } + }; + return server.streamReadOnlyAsync(raftClientRequest, readOnlyDataStream) + .thenCompose(reply -> writeAndFlush(ctx, newDataStreamReplyByteBuffer(request, reply))); + } catch (IOException e) { + return JavaUtils.completeExceptionally(e); + } + } + static void assertReplyCorrespondingToRequest( final DataStreamRequestByteBuf request, final DataStreamReply reply) { Preconditions.assertTrue(request.getClientId().equals(reply.getClientId())); diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java index 451040bb62..6ec4c0cb04 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java @@ -24,6 +24,7 @@ import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffers; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.netty.NettyConfigKeys; import org.apache.ratis.netty.NettyDataStreamUtils; @@ -31,6 +32,7 @@ import org.apache.ratis.netty.metrics.NettyServerStreamRpcMetrics; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.DataStreamPacket; +import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.security.TlsConf; @@ -280,13 +282,22 @@ protected void decode(ChannelHandlerContext context, ByteBuf buf, List o }; } - static final MessageToMessageEncoder ENCODER = new Encoder(); + static final MessageToMessageEncoder ENCODER = new Encoder(); @ChannelHandler.Sharable - static class Encoder extends MessageToMessageEncoder { + static class Encoder extends MessageToMessageEncoder { @Override - protected void encode(ChannelHandlerContext context, DataStreamReplyByteBuffer reply, List out) { - NettyDataStreamUtils.encodeDataStreamReplyByteBuffer(reply, out::add, context.alloc()); + protected void encode(ChannelHandlerContext context, DataStreamReply reply, List out) { + if (reply instanceof DataStreamReplyByteBuffer) { + NettyDataStreamUtils.encodeDataStreamReplyByteBuffer( + (DataStreamReplyByteBuffer) reply, out::add, context.alloc()); + } else if (reply instanceof DataStreamReplyByteBuffers) { + NettyDataStreamUtils.encodeDataStreamReplyByteBuffers( + (DataStreamReplyByteBuffers) reply, out::add, context.alloc()); + } else { + throw new IllegalStateException("Unexpected reply class " + + reply.getClass()); + } } } diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java index 84e3a1ed30..5d0a584cfa 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Objects; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.proto.RaftProtos.CommitInfoProto; @@ -32,6 +33,8 @@ import org.apache.ratis.protocol.AdminProtocol; import org.apache.ratis.protocol.RaftClientAsynchronousProtocol; import org.apache.ratis.protocol.RaftClientProtocol; +import org.apache.ratis.protocol.RaftClientReply; +import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftGroupMemberId; @@ -150,6 +153,18 @@ default RaftGroup getGroup() { /** @return the data stream rpc service. */ DataStreamServerRpc getDataStreamServerRpc(); + /** + * Submit a read-only request whose response may be streamed through the data stream RPC. + * + * @param request the read-only request + * @param stream the stream for response data chunks + * @return a future for the terminal reply + */ + default CompletableFuture streamReadOnlyAsync( + RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) throws IOException { + throw new UnsupportedOperationException("This method is NOT supported."); + } + /** @return the {@link RpcType}. */ default RpcType getRpcType() { return getFactory().getRpcType(); diff --git a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java index 98d4537847..601e80b54b 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java @@ -40,7 +40,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; +import java.util.ArrayList; import java.util.Collection; +import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; import java.util.function.Function; @@ -116,6 +118,18 @@ default CompletableFuture stream(RaftClientRequest request) { return CompletableFuture.completedFuture(null); } + /** + * Stream a read-only state machine request. Implementations may write zero or more data + * chunks before completing the returned future with the terminal reply message. + * + * @param request the read-only client request + * @param stream the output stream for response data chunks + * @return a future for the terminal reply message + */ + default CompletableFuture streamReadOnly(RaftClientRequest request, ReadOnlyDataStream stream) { + throw new UnsupportedOperationException("This method is NOT supported."); + } + /** * Link asynchronously the given stream with the given log entry. * The given stream can be null if it is unavailable due to errors. @@ -152,6 +166,39 @@ default CompletableFuture truncate(long logIndex) { } } + /** A stream for read-only state machine response data. */ + interface ReadOnlyDataStream { + /** + * Write the next response chunk. + * + * @return a future completed when the chunk is sent. + */ + CompletableFuture writeAsync(ByteBuffer buffer); + + /** + * Write the next response chunk using multiple buffers. + * + * The default implementation combines the buffers into one chunk. + * + * @return a future completed when the chunk is sent. + */ + default CompletableFuture writeAsync(Iterable buffers) { + final List list = new ArrayList<>(); + long length = 0; + for (ByteBuffer buffer : buffers) { + final ByteBuffer duplicate = buffer.duplicate(); + list.add(duplicate); + length += duplicate.remaining(); + } + final ByteBuffer combined = ByteBuffer.allocate(Math.toIntExact(length)); + for (ByteBuffer buffer : list) { + combined.put(buffer); + } + combined.flip(); + return writeAsync(combined); + } + } + /** * An optional API for event notifications. */ diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index f758fd0edc..8313c4c4f6 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -971,6 +971,13 @@ CompletableFuture executeSubmitClientRequestAsync(RaftClientReq clientExecutor).join(); } + CompletableFuture executeStreamReadOnlyAsync( + RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) { + return CompletableFuture.supplyAsync( + () -> JavaUtils.callAsUnchecked(() -> streamReadOnlyAsync(request, stream), CompletionException::new), + clientExecutor).thenCompose(Function.identity()); + } + @Override public CompletableFuture submitClientRequestAsync( RaftClientRequest request) throws IOException { @@ -979,6 +986,31 @@ public CompletableFuture submitClientRequestAsync( request, getMemberId().toString(), SpanNames.SUBMIT_CLIENT_REQUEST_ASYNC); } + public CompletableFuture streamReadOnlyAsync( + RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) throws IOException { + return TraceServer.traceAsyncMethod( + () -> streamReadOnlyAsyncInternal(request, stream), + request, getMemberId().toString(), SpanNames.SUBMIT_CLIENT_REQUEST_ASYNC); + } + + private CompletableFuture streamReadOnlyAsyncInternal( + RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) throws IOException { + assertLifeCycleState(LifeCycle.States.RUNNING); + if (!request.is(TypeCase.READ)) { + throw new IOException("Expected a read-only request but got " + request); + } + + LOG.debug("{}: receive read-only stream request({})", getMemberId(), request); + final Timekeeper timer = raftServerMetrics.getClientRequestTimer(request.getType()); + final Optional timerContext = Optional.ofNullable(timer).map(Timekeeper::time); + return readAsync(request, r -> streamReadOnlyStateMachine(r, stream)).whenComplete((clientReply, exception) -> { + timerContext.ifPresent(Timekeeper.Context::stop); + if (exception != null || clientReply.getException() != null) { + raftServerMetrics.incFailedRequestCount(request.getType()); + } + }); + } + private CompletableFuture submitClientRequestAsyncInternal( RaftClientRequest request) throws IOException { assertLifeCycleState(LifeCycle.States.RUNNING); @@ -1110,13 +1142,18 @@ private CompletableFuture getReadIndex(RaftClientRequest request, LeaderSt return writeIndexCache.getWriteIndexFuture(request).thenCompose(leader::getReadIndex); } private CompletableFuture readAsync(RaftClientRequest request) { + return readAsync(request, this::queryStateMachine); + } + + private CompletableFuture readAsync( + RaftClientRequest request, Function> query) { if (request.getType().getRead().getPreferNonLinearizable() || readOption == RaftServerConfigKeys.Read.Option.DEFAULT) { final CompletableFuture reply = checkLeaderState(request); if (reply != null) { return reply; } - return queryStateMachine(request); + return query.apply(request); } else if (readOption == RaftServerConfigKeys.Read.Option.LINEARIZABLE){ final LeaderStateImpl leader = role.getLeaderState().orElse(null); final CompletableFuture replyFuture; @@ -1136,7 +1173,7 @@ private CompletableFuture readAsync(RaftClientRequest request) return replyFuture .thenCompose(readIndex -> getState().getReadRequests().waitToAdvance(readIndex, () -> getReadException("add", snapshotInstallationHandler.getInProgressInstallSnapshotIndex(), false))) - .thenCompose(readIndex -> queryStateMachine(request)) + .thenCompose(readIndex -> query.apply(request)) .exceptionally(e -> readException2Reply(request, e)); } else { throw new IllegalStateException("Unexpected read option: " + readOption); @@ -1186,6 +1223,15 @@ CompletableFuture queryStateMachine(RaftClientRequest request) return processQueryFuture(stateMachine.query(request.getMessage()), request); } + CompletableFuture streamReadOnlyStateMachine( + RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) { + try { + return processQueryFuture(stateMachine.data().streamReadOnly(request, stream), request); + } catch (UnsupportedOperationException e) { + return queryStateMachine(request); + } + } + CompletableFuture processQueryFuture( CompletableFuture queryFuture, RaftClientRequest request) { return queryFuture.thenApply(r -> newReplyBuilder(request).setSuccess().setMessage(r).build()) diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java index 8539fa99ec..8acb0c4e0c 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java @@ -459,6 +459,13 @@ public CompletableFuture submitClientRequestAsync(RaftClientReq .thenCompose(impl -> impl.executeSubmitClientRequestAsync(request)); } + @Override + public CompletableFuture streamReadOnlyAsync( + RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) { + return getImplFuture(request.getRaftGroupId()) + .thenCompose(impl -> impl.executeStreamReadOnlyAsync(request, stream)); + } + @Override public RaftClientReply submitClientRequest(RaftClientRequest request) throws IOException { diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java index dabc93dda2..5dd4c810ed 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java @@ -18,6 +18,7 @@ package org.apache.ratis.datastream; import org.apache.ratis.BaseTest; +import org.apache.ratis.client.api.DataStreamInput; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RoutingTable; @@ -26,12 +27,15 @@ import org.apache.ratis.client.impl.DataStreamClientImpl.DataStreamOutputImpl; import org.apache.ratis.datastream.DataStreamTestUtils.MultiDataStreamStateMachine; import org.apache.ratis.datastream.DataStreamTestUtils.SingleDataStream; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; import org.apache.ratis.proto.RaftProtos.ReplicationLevel; import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.server.RaftServer; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.CollectionUtils; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.Timestamp; @@ -40,6 +44,7 @@ import org.junit.jupiter.api.Test; import java.io.File; +import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.StandardOpenOption; import java.util.Collection; @@ -71,6 +76,11 @@ public void testStreamWithInvalidRoutingTable() throws Exception { runWithNewCluster(NUM_SERVERS, this::runTestInvalidPrimaryInRoutingTable); } + @Test + public void testStreamReadOnly() throws Exception { + runWithNewCluster(NUM_SERVERS, this::runTestStreamReadOnly); + } + void testStreamWrites(CLUSTER cluster) throws Exception { waitForLeader(cluster); runTestDataStreamOutput(cluster); @@ -105,6 +115,41 @@ void runTestDataStreamOutput(CLUSTER cluster) throws Exception { assertLogEntry(cluster, request); } + void runTestStreamReadOnly(CLUSTER cluster) throws Exception { + final RaftPeer primaryServer = waitForLeader(cluster).getPeer(); + final ByteString query = ByteString.copyFromUtf8("stream-read-only"); + try (RaftClient client = cluster.createClient(primaryServer); + DataStreamInput in = client.getDataStreamApi().streamReadOnly(query.asReadOnlyByteBuffer())) { + for (int i = 0; i < MultiDataStreamStateMachine.READ_ONLY_STREAM_CHUNKS; i++) { + final ByteString chunk = MultiDataStreamStateMachine.getReadOnlyStreamChunk(query, i); + final DataStreamReply data = in.readAsync().join(); + DataStreamTestUtils.assertSuccessReply(Type.STREAM_DATA, chunk.size(), data); + Assertions.assertEquals(chunk, toByteString(data)); + } + + final DataStreamReply reply = in.readAsync().join(); + DataStreamTestUtils.assertSuccessReply(Type.STREAM_HEADER, 0, reply); + + final RaftClientReply clientReply = in.getRaftClientReplyFuture().join(); + Assertions.assertTrue(clientReply.isSuccess()); + Assertions.assertEquals(query, clientReply.getMessage().getContent()); + } + } + + private static ByteString toByteString(DataStreamReply reply) { + try { + if (reply instanceof DataStreamReplyByteBuffer) { + final ByteBuffer buffer = ((DataStreamReplyByteBuffer) reply).slice(); + return ByteString.copyFrom(buffer); + } else if (reply instanceof DataStreamReplyByteBuf) { + return ByteString.copyFrom(((DataStreamReplyByteBuf) reply).slice().nioBuffer()); + } + throw new AssertionError("Unexpected reply " + reply); + } finally { + reply.release(); + } + } + void runTestInvalidPrimaryInRoutingTable(CLUSTER cluster) throws Exception { final RaftPeer primaryServer = CollectionUtils.random(cluster.getGroup().getPeers()); diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java index 989b6cd2b2..ead4661811 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java @@ -42,6 +42,7 @@ import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.statemachine.StateMachine.DataChannel; +import org.apache.ratis.statemachine.StateMachine.ReadOnlyDataStream; import org.apache.ratis.statemachine.StateMachine.DataStream; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; @@ -147,8 +148,14 @@ static RoutingTable getRoutingTableChainTopology(Iterable peers, Raf } class MultiDataStreamStateMachine extends BaseStateMachine { + static final int READ_ONLY_STREAM_CHUNKS = 3; + private final ConcurrentMap streams = new ConcurrentHashMap<>(); + static ByteString getReadOnlyStreamChunk(ByteString query, int index) { + return ByteString.copyFromUtf8(query.toStringUtf8() + "-chunk-" + index); + } + @Override public CompletableFuture stream(RaftClientRequest request) { final SingleDataStream s = new SingleDataStream(request); @@ -176,6 +183,21 @@ public CompletableFuture applyTransaction(TransactionContext trx) { return CompletableFuture.completedFuture(() -> bytesWritten); } + @Override + public CompletableFuture query(Message request) { + return CompletableFuture.completedFuture(request); + } + + @Override + public CompletableFuture streamReadOnly(RaftClientRequest request, ReadOnlyDataStream stream) { + CompletableFuture writes = CompletableFuture.completedFuture(null); + for (int i = 0; i < READ_ONLY_STREAM_CHUNKS; i++) { + final ByteString chunk = getReadOnlyStreamChunk(request.getMessage().getContent(), i); + writes = writes.thenCompose(ignored -> stream.writeAsync(chunk.asReadOnlyByteBuffer())); + } + return writes.thenApply(ignored -> request.getMessage()); + } + SingleDataStream getSingleDataStream(RaftClientRequest request) { return getSingleDataStream(ClientInvocationId.valueOf(request)); } From a106ecf8a7d76c0e3e63691866334521a33118c7 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 26 May 2026 14:14:50 +0800 Subject: [PATCH 2/9] use data channel Co-authored-by: Tsz-Wo Nicholas Sze Signed-off-by: peterxcli --- .../impl/DataStreamReplyByteBuffers.java | 181 ------------------ .../ratis/netty/NettyDataStreamUtils.java | 13 -- .../netty/server/DataStreamManagement.java | 75 ++++---- .../netty/server/NettyServerStreamRpc.java | 4 - .../org/apache/ratis/server/RaftServer.java | 2 +- .../ratis/statemachine/StateMachine.java | 37 +--- .../ratis/server/impl/RaftServerImpl.java | 130 +++++-------- .../ratis/server/impl/RaftServerProxy.java | 2 +- 8 files changed, 92 insertions(+), 352 deletions(-) delete mode 100644 ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuffers.java diff --git a/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuffers.java b/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuffers.java deleted file mode 100644 index b36f045a74..0000000000 --- a/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuffers.java +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.ratis.datastream.impl; - -import org.apache.ratis.proto.RaftProtos.CommitInfoProto; -import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; -import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.DataStreamPacket; -import org.apache.ratis.protocol.DataStreamReply; -import org.apache.ratis.protocol.DataStreamReplyHeader; - -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -/** - * Implements {@link DataStreamReply} with multiple {@link ByteBuffer}s. - * - * This class is immutable. - */ -public final class DataStreamReplyByteBuffers extends DataStreamPacketImpl implements DataStreamReply { - public static final class Builder { - private ClientId clientId; - private Type type; - private long streamId; - private long streamOffset; - private Iterable buffers; - - private boolean success; - private long bytesWritten; - private Collection commitInfos; - - private Builder() { - } - - public Builder setClientId(ClientId clientId) { - this.clientId = clientId; - return this; - } - - public Builder setType(Type type) { - this.type = type; - return this; - } - - public Builder setStreamId(long streamId) { - this.streamId = streamId; - return this; - } - - public Builder setStreamOffset(long streamOffset) { - this.streamOffset = streamOffset; - return this; - } - - public Builder setBuffers(Iterable buffers) { - this.buffers = buffers; - return this; - } - - public Builder setSuccess(boolean success) { - this.success = success; - return this; - } - - public Builder setBytesWritten(long bytesWritten) { - this.bytesWritten = bytesWritten; - return this; - } - - public Builder setCommitInfos(Collection commitInfos) { - this.commitInfos = commitInfos; - return this; - } - - public Builder setDataStreamReplyHeader(DataStreamReplyHeader header) { - return setDataStreamPacket(header) - .setSuccess(header.isSuccess()) - .setBytesWritten(header.getBytesWritten()) - .setCommitInfos(header.getCommitInfos()); - } - - public Builder setDataStreamPacket(DataStreamPacket packet) { - return setClientId(packet.getClientId()) - .setType(packet.getType()) - .setStreamId(packet.getStreamId()) - .setStreamOffset(packet.getStreamOffset()); - } - - public DataStreamReplyByteBuffers build() { - return new DataStreamReplyByteBuffers(clientId, type, streamId, - streamOffset, buffers, success, bytesWritten, commitInfos); - } - } - - public static Builder newBuilder() { - return new Builder(); - } - - private final List buffers; - private final long dataLength; - private final boolean success; - private final long bytesWritten; - private final Collection commitInfos; - - @SuppressWarnings("parameternumber") - private DataStreamReplyByteBuffers(ClientId clientId, Type type, - long streamId, long streamOffset, Iterable buffers, - boolean success, long bytesWritten, - Collection commitInfos) { - super(clientId, type, streamId, streamOffset); - final List readOnlyBuffers = new ArrayList<>(); - long length = 0; - if (buffers != null) { - for (ByteBuffer buffer : buffers) { - final ByteBuffer readOnly = buffer.asReadOnlyBuffer(); - readOnlyBuffers.add(readOnly); - length += readOnly.remaining(); - } - } - this.buffers = Collections.unmodifiableList(readOnlyBuffers); - this.dataLength = length; - this.success = success; - this.bytesWritten = bytesWritten; - this.commitInfos = commitInfos != null ? commitInfos - : Collections.emptyList(); - } - - @Override - public long getDataLength() { - return dataLength; - } - - public List slices() { - final List slices = new ArrayList<>(buffers.size()); - for (ByteBuffer buffer : buffers) { - slices.add(buffer.slice()); - } - return slices; - } - - @Override - public boolean isSuccess() { - return success; - } - - @Override - public long getBytesWritten() { - return bytesWritten; - } - - @Override - public Collection getCommitInfos() { - return commitInfos; - } - - @Override - public String toString() { - return super.toString() - + "," + (success ? "SUCCESS" : "FAILED") - + ",bytesWritten=" + bytesWritten; - } -} diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java index 4ecf3e2077..d160bd1354 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java @@ -19,7 +19,6 @@ import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; -import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffers; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; import org.apache.ratis.datastream.impl.DataStreamRequestFilePositionCount; import org.apache.ratis.io.FilePositionCount; @@ -162,18 +161,6 @@ static void encodeDataStreamReplyByteBuffer(DataStreamReplyByteBuffer reply, Con out.accept(Unpooled.wrappedBuffer(reply.slice())); } - static void encodeDataStreamReplyByteBuffers(DataStreamReplyByteBuffers reply, Consumer out, - ByteBufAllocator allocator) { - ByteBuffer headerBuf = getDataStreamReplyHeaderProtoByteBuf(reply); - final ByteBuf headerLenBuf = allocator.ioBuffer(DataStreamPacketHeader.getSizeOfHeaderLen()); - headerLenBuf.writeInt(headerBuf.remaining()); - out.accept(headerLenBuf); - out.accept(Unpooled.wrappedBuffer(headerBuf)); - for (ByteBuffer buffer : reply.slices()) { - out.accept(Unpooled.wrappedBuffer(buffer)); - } - } - static DataStreamRequestByteBuf decodeDataStreamRequestByteBuf(ByteBuf buf) { return Optional.ofNullable(decodeDataStreamRequestHeader(buf)) .map(header -> checkHeader(header, buf)) diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java index ab5b48ca94..b0b93b7754 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java @@ -23,7 +23,6 @@ import org.apache.ratis.client.impl.DataStreamClientImpl.DataStreamOutputImpl; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; -import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffers; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.io.WriteOption; @@ -44,6 +43,7 @@ import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.protocol.RoutingTable; +import org.apache.ratis.protocol.exceptions.AlreadyClosedException; import org.apache.ratis.protocol.exceptions.AlreadyExistsException; import org.apache.ratis.protocol.exceptions.DataStreamException; import org.apache.ratis.server.RaftConfiguration; @@ -55,6 +55,7 @@ import org.apache.ratis.statemachine.StateMachine.DataChannel; import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelFuture; import org.apache.ratis.thirdparty.io.netty.channel.ChannelHandlerContext; import org.apache.ratis.thirdparty.io.netty.channel.ChannelId; import org.apache.ratis.util.ConcurrentUtils; @@ -70,8 +71,8 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.io.InterruptedIOException; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -373,26 +374,6 @@ static DataStreamReplyByteBuffer newDataStreamReadOnlyReplyByteBuffer(DataStream .build(); } - static DataStreamReplyByteBuffers newDataStreamReadOnlyReplyByteBuffers(DataStreamRequestByteBuf request, - long streamOffset, Iterable buffers) { - final List list = new ArrayList<>(); - long dataLength = 0; - for (ByteBuffer buffer : buffers) { - final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer(); - list.add(readOnlyBuffer); - dataLength += readOnlyBuffer.remaining(); - } - return DataStreamReplyByteBuffers.newBuilder() - .setClientId(request.getClientId()) - .setType(Type.STREAM_DATA) - .setStreamId(request.getStreamId()) - .setStreamOffset(streamOffset) - .setBuffers(list) - .setSuccess(true) - .setBytesWritten(dataLength) - .build(); - } - private static CompletableFuture writeAndFlush(ChannelHandlerContext ctx, DataStreamReply reply) { final CompletableFuture future = new CompletableFuture<>(); ctx.writeAndFlush(reply).addListener(channelFuture -> { @@ -588,22 +569,52 @@ private static RaftClientRequest toRaftClientRequest(DataStreamRequestByteBuf re private CompletableFuture submitReadOnlyRequest(DataStreamRequestByteBuf request, RaftClientRequest raftClientRequest, ChannelHandlerContext ctx) { try { - final StateMachine.ReadOnlyDataStream readOnlyDataStream = new StateMachine.ReadOnlyDataStream() { + final StateMachine.DataChannel readOnlyDataStream = new StateMachine.DataChannel() { private long streamOffset; + private boolean closed; @Override - public synchronized CompletableFuture writeAsync(ByteBuffer buffer) { - final DataStreamReplyByteBuffer reply = newDataStreamReadOnlyReplyByteBuffer(request, streamOffset, buffer); - streamOffset += reply.getDataLength(); - return writeAndFlush(ctx, reply); + public synchronized boolean isOpen() { + return !closed; } @Override - public synchronized CompletableFuture writeAsync(Iterable buffers) { - final DataStreamReplyByteBuffers reply = newDataStreamReadOnlyReplyByteBuffers( - request, streamOffset, buffers); - streamOffset += reply.getDataLength(); - return writeAndFlush(ctx, reply); + public synchronized void close() { + closed = true; + } + + @Override + public synchronized void force(boolean metadata) throws IOException { + if (!isOpen()) { + throw new AlreadyClosedException("Channel closed at offset " + streamOffset); + } + ctx.flush(); + } + + @Override + public synchronized int write(ByteBuffer buffer) throws IOException { + if (!isOpen()) { + throw new AlreadyClosedException("Channel closed at offset " + streamOffset); + } + final int length = buffer.remaining(); + final DataStreamReplyByteBuffer reply = newDataStreamReadOnlyReplyByteBuffer(request, streamOffset, buffer); + final ChannelFuture future = ctx.writeAndFlush(reply); + try { + future.await(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new InterruptedIOException( + "Interrupted while writing " + length + " bytes at offset " + streamOffset); + } + if (!future.isSuccess()) { + final Throwable cause = future.cause(); + if (cause instanceof IOException) { + throw (IOException) cause; + } + throw new IOException("Failed to write " + length + " bytes at offset " + streamOffset, cause); + } + streamOffset += length; + return length; } }; return server.streamReadOnlyAsync(raftClientRequest, readOnlyDataStream) diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java index 6ec4c0cb04..f222fbb84b 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java @@ -24,7 +24,6 @@ import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; -import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffers; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.netty.NettyConfigKeys; import org.apache.ratis.netty.NettyDataStreamUtils; @@ -291,9 +290,6 @@ protected void encode(ChannelHandlerContext context, DataStreamReply reply, List if (reply instanceof DataStreamReplyByteBuffer) { NettyDataStreamUtils.encodeDataStreamReplyByteBuffer( (DataStreamReplyByteBuffer) reply, out::add, context.alloc()); - } else if (reply instanceof DataStreamReplyByteBuffers) { - NettyDataStreamUtils.encodeDataStreamReplyByteBuffers( - (DataStreamReplyByteBuffers) reply, out::add, context.alloc()); } else { throw new IllegalStateException("Unexpected reply class " + reply.getClass()); diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java index 5d0a584cfa..e6287cc6d4 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java @@ -161,7 +161,7 @@ default RaftGroup getGroup() { * @return a future for the terminal reply */ default CompletableFuture streamReadOnlyAsync( - RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) throws IOException { + RaftClientRequest request, StateMachine.DataChannel stream) throws IOException { throw new UnsupportedOperationException("This method is NOT supported."); } diff --git a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java index 601e80b54b..114092062b 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java @@ -40,9 +40,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; -import java.util.ArrayList; import java.util.Collection; -import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; import java.util.function.Function; @@ -126,7 +124,7 @@ default CompletableFuture stream(RaftClientRequest request) { * @param stream the output stream for response data chunks * @return a future for the terminal reply message */ - default CompletableFuture streamReadOnly(RaftClientRequest request, ReadOnlyDataStream stream) { + default CompletableFuture streamReadOnly(RaftClientRequest request, DataChannel stream) { throw new UnsupportedOperationException("This method is NOT supported."); } @@ -166,39 +164,6 @@ default CompletableFuture truncate(long logIndex) { } } - /** A stream for read-only state machine response data. */ - interface ReadOnlyDataStream { - /** - * Write the next response chunk. - * - * @return a future completed when the chunk is sent. - */ - CompletableFuture writeAsync(ByteBuffer buffer); - - /** - * Write the next response chunk using multiple buffers. - * - * The default implementation combines the buffers into one chunk. - * - * @return a future completed when the chunk is sent. - */ - default CompletableFuture writeAsync(Iterable buffers) { - final List list = new ArrayList<>(); - long length = 0; - for (ByteBuffer buffer : buffers) { - final ByteBuffer duplicate = buffer.duplicate(); - list.add(duplicate); - length += duplicate.remaining(); - } - final ByteBuffer combined = ByteBuffer.allocate(Math.toIntExact(length)); - for (ByteBuffer buffer : list) { - combined.put(buffer); - } - combined.flip(); - return writeAsync(combined); - } - } - /** * An optional API for event notifications. */ diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index 8313c4c4f6..7b6f34d7cb 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -964,20 +964,36 @@ CompletableFuture executeSubmitServerRequestAsync( () -> JavaUtils.callAsUnchecked(submitFunction, CompletionException::new), serverExecutor).join(); } - CompletableFuture executeSubmitClientRequestAsync(RaftClientRequest request) { return CompletableFuture.supplyAsync( () -> JavaUtils.callAsUnchecked(() -> submitClientRequestAsync(request), CompletionException::new), clientExecutor).join(); } - CompletableFuture executeStreamReadOnlyAsync( - RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) { - return CompletableFuture.supplyAsync( - () -> JavaUtils.callAsUnchecked(() -> streamReadOnlyAsync(request, stream), CompletionException::new), + RaftClientRequest request, StateMachine.DataChannel stream) { + return CompletableFuture.supplyAsync(() -> JavaUtils.callAsUnchecked(() -> TraceServer.traceAsyncMethod(() -> { + assertLifeCycleState(LifeCycle.States.RUNNING); + if (!request.is(TypeCase.READ)) { + throw new IOException("Expected a read-only request but got " + request); + } + LOG.debug("{}: receive read-only stream request({})", getMemberId(), request); + final Timekeeper timer = raftServerMetrics.getClientRequestTimer(request.getType()); + final Optional timerContext = Optional.ofNullable(timer).map(Timekeeper::time); + return readAsync(request, r -> { + try { + return processQueryFuture(stateMachine.data().streamReadOnly(r, stream), r); + } catch (UnsupportedOperationException e) { + return queryStateMachine(r); + } + }).whenComplete((clientReply, exception) -> { + timerContext.ifPresent(Timekeeper.Context::stop); + if (exception != null || clientReply.getException() != null) { + raftServerMetrics.incFailedRequestCount(request.getType()); + } + }); + }, request, getMemberId().toString(), SpanNames.SUBMIT_CLIENT_REQUEST_ASYNC), CompletionException::new), clientExecutor).thenCompose(Function.identity()); } - @Override public CompletableFuture submitClientRequestAsync( RaftClientRequest request) throws IOException { @@ -985,32 +1001,6 @@ public CompletableFuture submitClientRequestAsync( () -> submitClientRequestAsyncInternal(request), request, getMemberId().toString(), SpanNames.SUBMIT_CLIENT_REQUEST_ASYNC); } - - public CompletableFuture streamReadOnlyAsync( - RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) throws IOException { - return TraceServer.traceAsyncMethod( - () -> streamReadOnlyAsyncInternal(request, stream), - request, getMemberId().toString(), SpanNames.SUBMIT_CLIENT_REQUEST_ASYNC); - } - - private CompletableFuture streamReadOnlyAsyncInternal( - RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) throws IOException { - assertLifeCycleState(LifeCycle.States.RUNNING); - if (!request.is(TypeCase.READ)) { - throw new IOException("Expected a read-only request but got " + request); - } - - LOG.debug("{}: receive read-only stream request({})", getMemberId(), request); - final Timekeeper timer = raftServerMetrics.getClientRequestTimer(request.getType()); - final Optional timerContext = Optional.ofNullable(timer).map(Timekeeper::time); - return readAsync(request, r -> streamReadOnlyStateMachine(r, stream)).whenComplete((clientReply, exception) -> { - timerContext.ifPresent(Timekeeper.Context::stop); - if (exception != null || clientReply.getException() != null) { - raftServerMetrics.incFailedRequestCount(request.getType()); - } - }); - } - private CompletableFuture submitClientRequestAsyncInternal( RaftClientRequest request) throws IOException { assertLifeCycleState(LifeCycle.States.RUNNING); @@ -1027,7 +1017,6 @@ private CompletableFuture submitClientRequestAsyncInternal( private CompletableFuture replyFuture(RaftClientRequest request) throws IOException { retryCache.invalidateRepliedRequests(request); - final TypeCase type = request.getType().getTypeCase(); switch (type) { case STALEREAD: @@ -1045,7 +1034,6 @@ private CompletableFuture replyFuture(RaftClientRequest request throw new IllegalStateException("Unexpected request type: " + type + ", request=" + request); } } - private CompletableFuture writeAsync(RaftClientRequest request) throws IOException { final CompletableFuture future = writeAsyncImpl(request); if (request.is(TypeCase.WRITE)) { @@ -1057,13 +1045,11 @@ private CompletableFuture writeAsync(RaftClientRequest request) } return future; } - private CompletableFuture writeAsyncImpl(RaftClientRequest request) throws IOException { final CompletableFuture reply = checkLeaderState(request); if (reply != null) { return reply; } - // query the retry cache final RetryCacheImpl.CacheQueryResult queryResult = retryCache.queryCache(request); final CacheEntry cacheEntry = queryResult.getEntry(); @@ -1081,7 +1067,6 @@ private CompletableFuture writeAsyncImpl(RaftClientRequest requ final RaftClientReply exceptionReply = newExceptionReply(request, e); return failWithReply(exceptionReply, cacheEntry, context); } - try { return appendTransaction(request, context, cacheEntry); } catch (Exception e) { @@ -1089,17 +1074,14 @@ private CompletableFuture writeAsyncImpl(RaftClientRequest requ throw e; } } - private CompletableFuture watchAsync(RaftClientRequest request) { if (OrderedAsync.DUMMY.getContent().equals(request.getMessage().getContent())) { return CompletableFuture.completedFuture(RaftClientReply.newBuilder().setRequest(request).build()); } - final CompletableFuture reply = checkLeaderState(request); if (reply != null) { return reply; } - return role.getLeaderState() .map(ls -> ls.addWatchRequest(request)) .orElseGet(() -> CompletableFuture.completedFuture( @@ -1150,54 +1132,47 @@ private CompletableFuture readAsync( if (request.getType().getRead().getPreferNonLinearizable() || readOption == RaftServerConfigKeys.Read.Option.DEFAULT) { final CompletableFuture reply = checkLeaderState(request); - if (reply != null) { - return reply; - } - return query.apply(request); - } else if (readOption == RaftServerConfigKeys.Read.Option.LINEARIZABLE){ - final LeaderStateImpl leader = role.getLeaderState().orElse(null); - final CompletableFuture replyFuture; - if (leader != null) { - replyFuture = getReadIndex(request, leader); - } else { - replyFuture = sendReadIndexAsync(request).thenApply(reply -> { + if (reply != null) { + return reply; + } + return query.apply(request); + } + if (readOption != RaftServerConfigKeys.Read.Option.LINEARIZABLE) { + throw new IllegalStateException("Unexpected read option: " + readOption); + } + final LeaderStateImpl leader = role.getLeaderState().orElse(null); + final CompletableFuture replyFuture = leader != null ? getReadIndex(request, leader) + : sendReadIndexAsync(request).thenApply(reply -> { if (reply.getServerReply().getSuccess()) { return reply.getReadIndex(); - } else { - throw new CompletionException(new ReadIndexException(getId() + - ": Failed to get read index from the leader: " + reply)); } + throw new CompletionException(new ReadIndexException(getId() + + ": Failed to get read index from the leader: " + reply)); }); - } - - return replyFuture - .thenCompose(readIndex -> getState().getReadRequests().waitToAdvance(readIndex, - () -> getReadException("add", snapshotInstallationHandler.getInProgressInstallSnapshotIndex(), false))) - .thenCompose(readIndex -> query.apply(request)) - .exceptionally(e -> readException2Reply(request, e)); - } else { - throw new IllegalStateException("Unexpected read option: " + readOption); - } + return replyFuture + .thenCompose(readIndex -> getState().getReadRequests().waitToAdvance(readIndex, + () -> getReadException("add", snapshotInstallationHandler.getInProgressInstallSnapshotIndex(), false))) + .thenCompose(readIndex -> query.apply(request)) + .exceptionally(e -> readException2Reply(request, e)); } private RaftClientReply readException2Reply(RaftClientRequest request, Throwable e) { e = JavaUtils.unwrapCompletionException(e); - if (e instanceof StateMachineException ) { + if (e instanceof StateMachineException) { return newExceptionReply(request, (StateMachineException) e); - } else if (e instanceof ReadException) { + } + if (e instanceof ReadException) { return newExceptionReply(request, (ReadException) e); - } else if (e instanceof ReadIndexException) { + } + if (e instanceof ReadIndexException) { return newExceptionReply(request, (ReadIndexException) e); - } else { - throw new CompletionException(e); } + throw new CompletionException(e); } - private CompletableFuture messageStreamAsync(RaftClientRequest request) throws IOException { final CompletableFuture reply = checkLeaderState(request); if (reply != null) { return reply; } - if (request.getType().getMessageStream().getEndOfRequest()) { final CompletableFuture f = streamEndOfRequestAsync(request); if (f.isCompletedExceptionally()) { @@ -1206,32 +1181,19 @@ private CompletableFuture messageStreamAsync(RaftClientRequest // the message stream has ended and the request become a WRITE request return replyFuture(f.join()); } - return role.getLeaderState() .map(ls -> ls.streamAsync(request)) .orElseGet(() -> CompletableFuture.completedFuture( newExceptionReply(request, generateNotLeaderException()))); } - private CompletableFuture streamEndOfRequestAsync(RaftClientRequest request) { return role.getLeaderState() .map(ls -> ls.streamEndOfRequestAsync(request)) .orElse(null); } - CompletableFuture queryStateMachine(RaftClientRequest request) { return processQueryFuture(stateMachine.query(request.getMessage()), request); } - - CompletableFuture streamReadOnlyStateMachine( - RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) { - try { - return processQueryFuture(stateMachine.data().streamReadOnly(request, stream), request); - } catch (UnsupportedOperationException e) { - return queryStateMachine(request); - } - } - CompletableFuture processQueryFuture( CompletableFuture queryFuture, RaftClientRequest request) { return queryFuture.thenApply(r -> newReplyBuilder(request).setSuccess().setMessage(r).build()) diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java index 8acb0c4e0c..5638735b23 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java @@ -461,7 +461,7 @@ public CompletableFuture submitClientRequestAsync(RaftClientReq @Override public CompletableFuture streamReadOnlyAsync( - RaftClientRequest request, StateMachine.ReadOnlyDataStream stream) { + RaftClientRequest request, StateMachine.DataChannel stream) { return getImplFuture(request.getRaftGroupId()) .thenCompose(impl -> impl.executeStreamReadOnlyAsync(request, stream)); } From 44c26cc0e7c95e18cbe87cff893f10dd3fec4909 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 26 May 2026 14:24:59 +0800 Subject: [PATCH 3/9] RATIS-1240. Remove client-side read stream changes --- .../ratis/client/DataStreamClientRpc.java | 8 - .../ratis/client/api/DataStreamApi.java | 11 -- .../ratis/client/api/DataStreamInput.java | 44 ----- .../ratis/client/impl/ClientProtoUtils.java | 17 +- .../client/impl/DataStreamClientImpl.java | 111 +---------- .../impl/DataStreamReplyByteBuf.java | 174 ------------------ .../ratis/protocol/DataStreamReply.java | 8 +- .../ratis/netty/NettyDataStreamUtils.java | 28 +-- .../netty/client/NettyClientStreamRpc.java | 132 +------------ .../netty/server/NettyServerStreamRpc.java | 15 +- .../datastream/DataStreamClusterTests.java | 45 ----- .../ratis/datastream/DataStreamTestUtils.java | 17 +- 12 files changed, 27 insertions(+), 583 deletions(-) delete mode 100644 ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamInput.java delete mode 100644 ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuf.java diff --git a/ratis-client/src/main/java/org/apache/ratis/client/DataStreamClientRpc.java b/ratis-client/src/main/java/org/apache/ratis/client/DataStreamClientRpc.java index fd5bd9538e..a9bcd9d58a 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/DataStreamClientRpc.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/DataStreamClientRpc.java @@ -24,7 +24,6 @@ import java.io.Closeable; import java.util.concurrent.CompletableFuture; -import java.util.function.Consumer; /** * A client interface for sending stream requests. @@ -37,11 +36,4 @@ default CompletableFuture streamAsync(DataStreamRequest request throw new UnsupportedOperationException(getClass() + " does not support " + JavaUtils.getCurrentStackTraceElement().getMethodName()); } - - /** Async call to send a request and receive multiple replies for the request. */ - default CompletableFuture streamAsync( - DataStreamRequest request, Consumer replyConsumer) { - throw new UnsupportedOperationException(getClass() + " does not support " - + JavaUtils.getCurrentStackTraceElement().getMethodName()); - } } diff --git a/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamApi.java b/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamApi.java index 85f237e664..9e5e2438cb 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamApi.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamApi.java @@ -50,15 +50,4 @@ default DataStreamOutput stream() { /** Create a stream by providing a customized header message and route table. */ DataStreamOutput stream(ByteBuffer headerMessage, RoutingTable routingTable); - - /** - * Create a stream to read data for readonly requests. - * This corresponds to {@link AsyncApi#sendReadOnly(org.apache.ratis.protocol.Message)}. - */ - default DataStreamInput streamReadOnly() { - return streamReadOnly(null); - } - - /** Create a stream by providing a customized header message for readonly requests. */ - DataStreamInput streamReadOnly(ByteBuffer message); } diff --git a/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamInput.java b/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamInput.java deleted file mode 100644 index 4f56aa9d57..0000000000 --- a/ratis-client/src/main/java/org/apache/ratis/client/api/DataStreamInput.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.ratis.client.api; - -import org.apache.ratis.protocol.DataStreamReply; -import org.apache.ratis.protocol.RaftClientReply; - -import java.io.Closeable; -import java.util.concurrent.CompletableFuture; - -/** - * An asynchronous input stream supporting zero buffer copying. - */ -public interface DataStreamInput extends Closeable { - /** - * Read the next chunk in the stream asynchronously. - * - * @return a future of the reply. - */ - CompletableFuture readAsync(); - - /** - * Return the future of the {@link RaftClientReply} - * which will be received once the read-only stream has received a reply. - * - * @return the future of the {@link RaftClientReply}. - */ - CompletableFuture getRaftClientReplyFuture(); -} diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java index f6c06bd323..d2146a521f 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/ClientProtoUtils.java @@ -18,7 +18,6 @@ package org.apache.ratis.client.impl; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; -import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; import org.apache.ratis.proto.RaftProtos.AlreadyClosedExceptionProto; import org.apache.ratis.proto.RaftProtos.ClientMessageEntryProto; import org.apache.ratis.proto.RaftProtos.GroupAddRequestProto; @@ -379,24 +378,22 @@ static GroupInfoReplyProto toGroupInfoReplyProto(GroupInfoReply reply) { return b.build(); } - public static RaftClientReply getRaftClientReply(DataStreamReply reply) { - try { - if (reply instanceof DataStreamReplyByteBuffer) { - return toRaftClientReply(((DataStreamReplyByteBuffer) reply).slice()); - } else if (reply instanceof DataStreamReplyByteBuf) { - return toRaftClientReply(((DataStreamReplyByteBuf) reply).slice().nioBuffer()); - } + static RaftClientReply getRaftClientReply(DataStreamReply reply) { + if (!(reply instanceof DataStreamReplyByteBuffer)) { throw new IllegalStateException("Unexpected " + reply.getClass() + ": reply is " + reply); + } + try { + return toRaftClientReply(((DataStreamReplyByteBuffer) reply).slice()); } catch (InvalidProtocolBufferException e) { throw new IllegalStateException("Failed to getRaftClientReply from " + reply, e); } } - public static RaftClientReply toRaftClientReply(ByteBuffer buffer) throws InvalidProtocolBufferException { + static RaftClientReply toRaftClientReply(ByteBuffer buffer) throws InvalidProtocolBufferException { return toRaftClientReply(RaftClientReplyProto.parseFrom(buffer)); } - public static RaftClientReply toRaftClientReply(RaftClientReplyProto replyProto) { + static RaftClientReply toRaftClientReply(RaftClientReplyProto replyProto) { final RaftRpcReplyProto rp = replyProto.getRpcReply(); final RaftGroupMemberId serverMemberId = ProtoUtils.toRaftGroupMemberId(rp.getReplyId(), rp.getRaftGroupId()); diff --git a/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java b/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java index 82d6859194..313131cbda 100644 --- a/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java +++ b/ratis-client/src/main/java/org/apache/ratis/client/impl/DataStreamClientImpl.java @@ -23,11 +23,9 @@ import org.apache.ratis.client.DataStreamClientRpc; import org.apache.ratis.client.DataStreamOutputRpc; import org.apache.ratis.client.RaftClient; -import org.apache.ratis.client.api.DataStreamInput; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamPacketByteBuffer; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; -import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; import org.apache.ratis.io.FilePositionCount; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.io.WriteOption; @@ -36,17 +34,16 @@ import org.apache.ratis.protocol.ClientInvocationId; import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.DataStreamRequestHeader; -import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftPeer; -import org.apache.ratis.protocol.RoutingTable; import org.apache.ratis.protocol.exceptions.AlreadyClosedException; import org.apache.ratis.rpc.CallId; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.util.IOUtils; +import org.apache.ratis.protocol.*; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.MemoizedSupplier; import org.apache.ratis.util.Preconditions; @@ -57,12 +54,10 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; -import java.util.ArrayDeque; import java.util.Arrays; import java.util.Collections; import java.util.Objects; import java.util.Optional; -import java.util.Queue; import java.util.concurrent.CompletableFuture; /** @@ -242,93 +237,6 @@ private CompletableFuture sendForward(DataStreamReply writeRepl } } - public final class DataStreamInputImpl implements DataStreamInput { - private final RaftClientRequest header; - private final CompletableFuture replyFuture; - private final CompletableFuture raftClientReplyFuture = new CompletableFuture<>(); - private final Queue replies = new ArrayDeque<>(); - private final Queue> pendingReads = new ArrayDeque<>(); - private Throwable readException; - private boolean closed; - - private DataStreamInputImpl(RaftClientRequest request) { - this.header = request; - final ByteBuffer buffer = ClientProtoUtils.toRaftClientRequestProtoByteBuffer(header); - final DataStreamRequestHeader h = new DataStreamRequestHeader(header.getClientId(), Type.STREAM_HEADER, - header.getCallId(), 0, buffer.remaining(), StandardWriteOption.FLUSH, StandardWriteOption.CLOSE); - this.replyFuture = dataStreamClientRpc.streamAsync(new DataStreamRequestByteBuffer(h, buffer), this::receive); - replyFuture.thenApply(ClientProtoUtils::getRaftClientReply) - .whenComplete(JavaUtils.asBiConsumer(raftClientReplyFuture)); - replyFuture.whenComplete((reply, exception) -> { - if (exception != null) { - failReads(exception); - } - }); - } - - private void receive(DataStreamReply reply) { - final CompletableFuture pending; - synchronized (this) { - if (closed) { - reply.release(); - return; - } - pending = pendingReads.poll(); - if (pending == null) { - replies.add(reply); - return; - } - } - pending.complete(reply); - } - - private void failReads(Throwable t) { - for (;;) { - final CompletableFuture pending; - synchronized (this) { - readException = t; - pending = pendingReads.poll(); - if (pending == null) { - return; - } - } - pending.completeExceptionally(t); - } - } - - @Override - public synchronized CompletableFuture readAsync() { - if (closed) { - return JavaUtils.completeExceptionally(new AlreadyClosedException( - clientId + ": stream already closed, request=" + header)); - } - final DataStreamReply reply = replies.poll(); - if (reply != null) { - return CompletableFuture.completedFuture(reply); - } - if (readException != null) { - return JavaUtils.completeExceptionally(readException); - } - final CompletableFuture f = new CompletableFuture<>(); - pendingReads.add(f); - return f; - } - - @Override - public CompletableFuture getRaftClientReplyFuture() { - return raftClientReplyFuture; - } - - @Override - public synchronized void close() { - closed = true; - for (DataStreamReply reply; (reply = replies.poll()) != null;) { - reply.release(); - } - failReads(new AlreadyClosedException(clientId + ": stream already closed, request=" + header)); - } - } - @Override public DataStreamClientRpc getClientRpc() { return dataStreamClientRpc; @@ -366,21 +274,6 @@ public DataStreamOutputRpc stream(ByteBuffer headerMessage, RoutingTable routing return new DataStreamOutputImpl(request); } - @Override - public DataStreamInput streamReadOnly(ByteBuffer headerMessage) { - final Message message = - Optional.ofNullable(headerMessage).map(ByteString::copyFrom).map(Message::valueOf).orElse(null); - final RaftClientRequest request = RaftClientRequest.newBuilder() - .setClientId(clientId) - .setServerId(dataStreamServer.getId()) - .setGroupId(groupId) - .setCallId(CallId.getAndIncrement()) - .setMessage(message) - .setType(RaftClientRequest.readRequestType()) - .build(); - return new DataStreamInputImpl(request); - } - @Override public void close() throws IOException { dataStreamClientRpc.close(); diff --git a/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuf.java b/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuf.java deleted file mode 100644 index d94d3f0c33..0000000000 --- a/ratis-common/src/main/java/org/apache/ratis/datastream/impl/DataStreamReplyByteBuf.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.ratis.datastream.impl; - -import org.apache.ratis.proto.RaftProtos.CommitInfoProto; -import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; -import org.apache.ratis.protocol.ClientId; -import org.apache.ratis.protocol.DataStreamPacket; -import org.apache.ratis.protocol.DataStreamReply; -import org.apache.ratis.protocol.DataStreamReplyHeader; -import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; -import org.apache.ratis.thirdparty.io.netty.buffer.Unpooled; - -import java.util.Collection; -import java.util.Collections; -import java.util.Optional; -import java.util.concurrent.atomic.AtomicReference; - -/** - * Implements {@link DataStreamReply} with {@link ByteBuf}. - */ -public final class DataStreamReplyByteBuf extends DataStreamPacketImpl implements DataStreamReply { - public static final class Builder { - private ClientId clientId; - private Type type; - private long streamId; - private long streamOffset; - private ByteBuf buffer; - - private boolean success; - private long bytesWritten; - private Collection commitInfos; - - private Builder() { - } - - public Builder setClientId(ClientId clientId) { - this.clientId = clientId; - return this; - } - - public Builder setType(Type type) { - this.type = type; - return this; - } - - public Builder setStreamId(long streamId) { - this.streamId = streamId; - return this; - } - - public Builder setStreamOffset(long streamOffset) { - this.streamOffset = streamOffset; - return this; - } - - public Builder setBuffer(ByteBuf buffer) { - this.buffer = buffer; - return this; - } - - public Builder setSuccess(boolean success) { - this.success = success; - return this; - } - - public Builder setBytesWritten(long bytesWritten) { - this.bytesWritten = bytesWritten; - return this; - } - - public Builder setCommitInfos(Collection commitInfos) { - this.commitInfos = commitInfos; - return this; - } - - public Builder setDataStreamReplyHeader(DataStreamReplyHeader header) { - return setDataStreamPacket(header) - .setSuccess(header.isSuccess()) - .setBytesWritten(header.getBytesWritten()) - .setCommitInfos(header.getCommitInfos()); - } - - public Builder setDataStreamPacket(DataStreamPacket packet) { - return setClientId(packet.getClientId()) - .setType(packet.getType()) - .setStreamId(packet.getStreamId()) - .setStreamOffset(packet.getStreamOffset()); - } - - public DataStreamReplyByteBuf build() { - return new DataStreamReplyByteBuf( - clientId, type, streamId, streamOffset, buffer, success, bytesWritten, commitInfos); - } - } - - public static Builder newBuilder() { - return new Builder(); - } - - private final AtomicReference buffer; - private final boolean success; - private final long bytesWritten; - private final Collection commitInfos; - - @SuppressWarnings("parameternumber") - private DataStreamReplyByteBuf(ClientId clientId, Type type, long streamId, long streamOffset, ByteBuf buffer, - boolean success, long bytesWritten, Collection commitInfos) { - super(clientId, type, streamId, streamOffset); - this.buffer = new AtomicReference<>(buffer != null ? buffer.asReadOnly() : Unpooled.EMPTY_BUFFER); - this.success = success; - this.bytesWritten = bytesWritten; - this.commitInfos = commitInfos != null ? commitInfos : Collections.emptyList(); - } - - private ByteBuf getBuffer() { - return Optional.ofNullable(buffer.get()).orElseThrow( - () -> new IllegalStateException("buffer is already released in " + this)); - } - - @Override - public long getDataLength() { - return getBuffer().readableBytes(); - } - - public ByteBuf slice() { - return getBuffer().slice(); - } - - @Override - public boolean isSuccess() { - return success; - } - - @Override - public long getBytesWritten() { - return bytesWritten; - } - - @Override - public Collection getCommitInfos() { - return commitInfos; - } - - @Override - public void release() { - final ByteBuf got = buffer.getAndSet(null); - if (got != null && got != Unpooled.EMPTY_BUFFER) { - got.release(); - } - } - - @Override - public String toString() { - return super.toString() - + "," + (success ? "SUCCESS" : "FAILED") - + ",bytesWritten=" + bytesWritten; - } -} diff --git a/ratis-common/src/main/java/org/apache/ratis/protocol/DataStreamReply.java b/ratis-common/src/main/java/org/apache/ratis/protocol/DataStreamReply.java index 8c6c4466cf..459aee363c 100644 --- a/ratis-common/src/main/java/org/apache/ratis/protocol/DataStreamReply.java +++ b/ratis-common/src/main/java/org/apache/ratis/protocol/DataStreamReply.java @@ -30,10 +30,4 @@ public interface DataStreamReply extends DataStreamPacket { /** @return the commit information when the reply is created. */ Collection getCommitInfos(); - - /** - * Release resources owned by this reply. - */ - default void release() { - } -} +} \ No newline at end of file diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java index d160bd1354..583d6e3e94 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyDataStreamUtils.java @@ -18,7 +18,6 @@ package org.apache.ratis.netty; import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; -import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; import org.apache.ratis.datastream.impl.DataStreamRequestFilePositionCount; import org.apache.ratis.io.FilePositionCount; @@ -31,7 +30,6 @@ import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.DataStreamPacketHeader; import org.apache.ratis.protocol.DataStreamReplyHeader; -import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.DataStreamRequest; import org.apache.ratis.protocol.DataStreamRequestHeader; import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; @@ -82,7 +80,7 @@ static ByteBuffer getDataStreamRequestHeaderProtoByteBuffer(DataStreamRequest re .asReadOnlyByteBuffer(); } - static ByteBuffer getDataStreamReplyHeaderProtoByteBuf(DataStreamReply reply) { + static ByteBuffer getDataStreamReplyHeaderProtoByteBuf(DataStreamReplyByteBuffer reply) { DataStreamPacketHeaderProto.Builder b = DataStreamPacketHeaderProto .newBuilder() .setClientId(reply.getClientId().toByteString()) @@ -226,30 +224,6 @@ static DataStreamReplyByteBuffer decodeDataStreamReplyByteBuffer(ByteBuf buf) { .orElse(null); } - static DataStreamReplyByteBuffer toDataStreamReplyByteBuffer(DataStreamReplyByteBuf reply) { - try { - return DataStreamReplyByteBuffer.newBuilder() - .setDataStreamPacket(reply) - .setBuffer(copy(reply.slice())) - .setSuccess(reply.isSuccess()) - .setBytesWritten(reply.getBytesWritten()) - .setCommitInfos(reply.getCommitInfos()) - .build(); - } finally { - reply.release(); - } - } - - static DataStreamReplyByteBuf decodeDataStreamReplyByteBuf(ByteBuf buf) { - return Optional.ofNullable(decodeDataStreamReplyHeader(buf)) - .map(header -> checkHeader(header, buf)) - .map(header -> DataStreamReplyByteBuf.newBuilder() - .setDataStreamReplyHeader(header) - .setBuffer(decodeData(buf, header, ByteBuf::retainedSlice)) - .build()) - .orElse(null); - } - static DataStreamReplyHeader decodeDataStreamReplyHeader(ByteBuf buf) { if (DataStreamPacketHeader.getSizeOfHeaderLen() > buf.readableBytes()) { return null; diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java index 3716b2a760..2bfeea31e1 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/client/NettyClientStreamRpc.java @@ -23,7 +23,6 @@ import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuffer; -import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; import org.apache.ratis.datastream.impl.DataStreamRequestFilePositionCount; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.io.WriteOption; @@ -56,7 +55,6 @@ import org.apache.ratis.thirdparty.io.netty.handler.codec.ByteToMessageDecoder; import org.apache.ratis.thirdparty.io.netty.handler.codec.MessageToMessageEncoder; import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContext; -import org.apache.ratis.thirdparty.io.netty.util.concurrent.ScheduledFuture; import org.apache.ratis.util.JavaUtils; import org.apache.ratis.util.MemoizedFunction; import org.apache.ratis.util.MemoizedSupplier; @@ -75,13 +73,10 @@ import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Supplier; @@ -325,66 +320,10 @@ synchronized boolean shouldFlush(int countMin, SizeInBytes bytesMin, DataStreamR } } - static class ReadOnlyStreamingReply { - private final NettyClientReplies.RequestEntry terminalEntry; - private final CompletableFuture replyFuture; - private final Consumer replyConsumer; - private Supplier> timeoutScheduler; - private ScheduledFuture timeoutFuture; - - ReadOnlyStreamingReply(DataStreamRequest request, CompletableFuture replyFuture, - Consumer replyConsumer) { - this.terminalEntry = new NettyClientReplies.RequestEntry(request); - this.replyFuture = replyFuture; - this.replyConsumer = replyConsumer; - } - - synchronized boolean receiveReply(DataStreamReply reply) { - NettyClientReplies.ReplyEntry.cancel(timeoutFuture); - final boolean terminal = !reply.isSuccess() || terminalEntry.equals(new NettyClientReplies.RequestEntry(reply)); - final DataStreamReply replyToComplete = terminal && reply instanceof DataStreamReplyByteBuf ? - NettyDataStreamUtils.toDataStreamReplyByteBuffer((DataStreamReplyByteBuf) reply) : reply; - try { - replyConsumer.accept(replyToComplete); - } catch (Throwable t) { - if (replyToComplete == reply) { - reply.release(); - } - completeExceptionally(t); - return true; - } - - if (terminal) { - replyFuture.complete(replyToComplete); - return true; - } - scheduleTimeout(); - return false; - } - - synchronized void completeExceptionally(Throwable t) { - NettyClientReplies.ReplyEntry.cancel(timeoutFuture); - replyFuture.completeExceptionally(t); - } - - synchronized void scheduleTimeout(Supplier> scheduleMethod) { - timeoutScheduler = scheduleMethod; - scheduleTimeout(); - } - - private void scheduleTimeout() { - if (!replyFuture.isDone() && timeoutScheduler != null) { - timeoutFuture = timeoutScheduler.get(); - } - } - } - private final String name; private final Connection connection; private final NettyClientReplies replies = new NettyClientReplies(); - private final ConcurrentMap readOnlyStreamingReplies - = new ConcurrentHashMap<>(); private final TimeDuration requestTimeout; private final TimeDuration closeTimeout; @@ -422,35 +361,17 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) { LOG.debug("{}: read {}", name, reply); final ClientInvocationId clientInvocationId = ClientInvocationId.valueOf( reply.getClientId(), reply.getStreamId()); - final ReadOnlyStreamingReply readOnlyStreamingReply = readOnlyStreamingReplies.get(clientInvocationId); - if (readOnlyStreamingReply != null) { - try { - if (readOnlyStreamingReply.receiveReply(reply)) { - readOnlyStreamingReplies.remove(clientInvocationId, readOnlyStreamingReply); - } - } catch (Throwable cause) { - LOG.warn("{} : channelRead error:", name, cause); - readOnlyStreamingReplies.remove(clientInvocationId, readOnlyStreamingReply); - readOnlyStreamingReply.completeExceptionally(cause); - } - return; - } - final NettyClientReplies.ReplyMap replyMap = replies.getReplyMap(clientInvocationId); if (replyMap == null) { LOG.error("{}: {} replyMap not found for reply: {}", name, clientInvocationId, reply); - reply.release(); return; } try { - final DataStreamReply replyToReceive = reply instanceof DataStreamReplyByteBuf ? - NettyDataStreamUtils.toDataStreamReplyByteBuffer((DataStreamReplyByteBuf) reply) : reply; - replyMap.receiveReply(replyToReceive); + replyMap.receiveReply(reply); } catch (Throwable cause) { LOG.warn("{} : channelRead error:", name, cause); replyMap.completeExceptionally(cause); - reply.release(); } } @@ -535,7 +456,7 @@ static ByteToMessageDecoder newDecoder() { @Override protected void decode(ChannelHandlerContext context, ByteBuf buf, List out) { - Optional.ofNullable(NettyDataStreamUtils.decodeDataStreamReplyByteBuf(buf)).ifPresent(out::add); + Optional.ofNullable(NettyDataStreamUtils.decodeDataStreamReplyByteBuffer(buf)).ifPresent(out::add); } }; } @@ -586,55 +507,6 @@ public CompletableFuture streamAsync(DataStreamRequest request) return f; } - @Override - public CompletableFuture streamAsync( - DataStreamRequest request, Consumer replyConsumer) { - final CompletableFuture f = new CompletableFuture<>(); - final ClientInvocationId clientInvocationId = ClientInvocationId.valueOf(request.getClientId(), - request.getStreamId()); - final ReadOnlyStreamingReply replyEntry = new ReadOnlyStreamingReply(request, f, replyConsumer); - if (readOnlyStreamingReplies.putIfAbsent(clientInvocationId, replyEntry) != null) { - f.completeExceptionally(new AlreadyClosedException(this + ": A read-only stream already exists for " - + clientInvocationId)); - return f; - } - - final ChannelFuture channelFuture; - final Channel channel; - LOG.debug("{}: write read-only stream begin {}", this, request); - synchronized (replyEntry) { - channel = connection.getChannelUninterruptibly(); - if (channel == null) { - readOnlyStreamingReplies.remove(clientInvocationId, replyEntry); - f.completeExceptionally(new AlreadyClosedException(this + ": Failed to send " + request)); - return f; - } - final Function writeMethod = outstandingRequests.shouldFlush( - flushRequestCountMin, flushRequestBytesMin, request)? channel::writeAndFlush: channel::write; - channelFuture = writeMethod.apply(request); - } - channelFuture.addListener(future -> { - if (!future.isSuccess()) { - readOnlyStreamingReplies.remove(clientInvocationId, replyEntry); - final IOException e = new IOException(this + ": Failed to send " + request + " to " + channel.remoteAddress(), - future.cause()); - replyEntry.completeExceptionally(e); - LOG.error("Channel write failed", e); - } else { - LOG.debug("{}: write read-only stream after {}", this, request); - - replyEntry.scheduleTimeout(() -> channel.eventLoop().schedule(() -> { - if (!f.isDone()) { - readOnlyStreamingReplies.remove(clientInvocationId, replyEntry); - replyEntry.completeExceptionally(new TimeoutIOException( - "Timeout " + requestTimeout + ": Failed to send " + request + " via channel " + channel)); - } - }, requestTimeout.getDuration(), requestTimeout.getUnit())); - } - }); - return f; - } - @Override public void close() { final boolean flush = outstandingRequests.shouldFlush(0, SizeInBytes.ZERO, null); diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java index f222fbb84b..451040bb62 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java @@ -31,7 +31,6 @@ import org.apache.ratis.netty.metrics.NettyServerStreamRpcMetrics; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.DataStreamPacket; -import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.security.TlsConf; @@ -281,19 +280,13 @@ protected void decode(ChannelHandlerContext context, ByteBuf buf, List o }; } - static final MessageToMessageEncoder ENCODER = new Encoder(); + static final MessageToMessageEncoder ENCODER = new Encoder(); @ChannelHandler.Sharable - static class Encoder extends MessageToMessageEncoder { + static class Encoder extends MessageToMessageEncoder { @Override - protected void encode(ChannelHandlerContext context, DataStreamReply reply, List out) { - if (reply instanceof DataStreamReplyByteBuffer) { - NettyDataStreamUtils.encodeDataStreamReplyByteBuffer( - (DataStreamReplyByteBuffer) reply, out::add, context.alloc()); - } else { - throw new IllegalStateException("Unexpected reply class " - + reply.getClass()); - } + protected void encode(ChannelHandlerContext context, DataStreamReplyByteBuffer reply, List out) { + NettyDataStreamUtils.encodeDataStreamReplyByteBuffer(reply, out::add, context.alloc()); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java index 5dd4c810ed..dabc93dda2 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamClusterTests.java @@ -18,7 +18,6 @@ package org.apache.ratis.datastream; import org.apache.ratis.BaseTest; -import org.apache.ratis.client.api.DataStreamInput; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RoutingTable; @@ -27,15 +26,12 @@ import org.apache.ratis.client.impl.DataStreamClientImpl.DataStreamOutputImpl; import org.apache.ratis.datastream.DataStreamTestUtils.MultiDataStreamStateMachine; import org.apache.ratis.datastream.DataStreamTestUtils.SingleDataStream; -import org.apache.ratis.datastream.impl.DataStreamReplyByteBuf; -import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; import org.apache.ratis.proto.RaftProtos.ReplicationLevel; import org.apache.ratis.protocol.DataStreamReply; import org.apache.ratis.protocol.RaftClientReply; import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.server.RaftServer; -import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.util.CollectionUtils; import org.apache.ratis.util.FileUtils; import org.apache.ratis.util.Timestamp; @@ -44,7 +40,6 @@ import org.junit.jupiter.api.Test; import java.io.File; -import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.file.StandardOpenOption; import java.util.Collection; @@ -76,11 +71,6 @@ public void testStreamWithInvalidRoutingTable() throws Exception { runWithNewCluster(NUM_SERVERS, this::runTestInvalidPrimaryInRoutingTable); } - @Test - public void testStreamReadOnly() throws Exception { - runWithNewCluster(NUM_SERVERS, this::runTestStreamReadOnly); - } - void testStreamWrites(CLUSTER cluster) throws Exception { waitForLeader(cluster); runTestDataStreamOutput(cluster); @@ -115,41 +105,6 @@ void runTestDataStreamOutput(CLUSTER cluster) throws Exception { assertLogEntry(cluster, request); } - void runTestStreamReadOnly(CLUSTER cluster) throws Exception { - final RaftPeer primaryServer = waitForLeader(cluster).getPeer(); - final ByteString query = ByteString.copyFromUtf8("stream-read-only"); - try (RaftClient client = cluster.createClient(primaryServer); - DataStreamInput in = client.getDataStreamApi().streamReadOnly(query.asReadOnlyByteBuffer())) { - for (int i = 0; i < MultiDataStreamStateMachine.READ_ONLY_STREAM_CHUNKS; i++) { - final ByteString chunk = MultiDataStreamStateMachine.getReadOnlyStreamChunk(query, i); - final DataStreamReply data = in.readAsync().join(); - DataStreamTestUtils.assertSuccessReply(Type.STREAM_DATA, chunk.size(), data); - Assertions.assertEquals(chunk, toByteString(data)); - } - - final DataStreamReply reply = in.readAsync().join(); - DataStreamTestUtils.assertSuccessReply(Type.STREAM_HEADER, 0, reply); - - final RaftClientReply clientReply = in.getRaftClientReplyFuture().join(); - Assertions.assertTrue(clientReply.isSuccess()); - Assertions.assertEquals(query, clientReply.getMessage().getContent()); - } - } - - private static ByteString toByteString(DataStreamReply reply) { - try { - if (reply instanceof DataStreamReplyByteBuffer) { - final ByteBuffer buffer = ((DataStreamReplyByteBuffer) reply).slice(); - return ByteString.copyFrom(buffer); - } else if (reply instanceof DataStreamReplyByteBuf) { - return ByteString.copyFrom(((DataStreamReplyByteBuf) reply).slice().nioBuffer()); - } - throw new AssertionError("Unexpected reply " + reply); - } finally { - reply.release(); - } - } - void runTestInvalidPrimaryInRoutingTable(CLUSTER cluster) throws Exception { final RaftPeer primaryServer = CollectionUtils.random(cluster.getGroup().getPeers()); diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java index ead4661811..b34cf758d2 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java @@ -42,7 +42,6 @@ import org.apache.ratis.server.raftlog.LogProtoUtils; import org.apache.ratis.server.raftlog.RaftLog; import org.apache.ratis.statemachine.StateMachine.DataChannel; -import org.apache.ratis.statemachine.StateMachine.ReadOnlyDataStream; import org.apache.ratis.statemachine.StateMachine.DataStream; import org.apache.ratis.statemachine.TransactionContext; import org.apache.ratis.statemachine.impl.BaseStateMachine; @@ -58,6 +57,7 @@ import org.slf4j.LoggerFactory; import java.io.File; +import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; @@ -189,13 +189,16 @@ public CompletableFuture query(Message request) { } @Override - public CompletableFuture streamReadOnly(RaftClientRequest request, ReadOnlyDataStream stream) { - CompletableFuture writes = CompletableFuture.completedFuture(null); - for (int i = 0; i < READ_ONLY_STREAM_CHUNKS; i++) { - final ByteString chunk = getReadOnlyStreamChunk(request.getMessage().getContent(), i); - writes = writes.thenCompose(ignored -> stream.writeAsync(chunk.asReadOnlyByteBuffer())); + public CompletableFuture streamReadOnly(RaftClientRequest request, DataChannel stream) { + try { + for (int i = 0; i < READ_ONLY_STREAM_CHUNKS; i++) { + final ByteString chunk = getReadOnlyStreamChunk(request.getMessage().getContent(), i); + stream.write(chunk.asReadOnlyByteBuffer()); + } + return CompletableFuture.completedFuture(Message.valueOf(getId().toByteString())); + } catch (IOException e) { + return JavaUtils.completeExceptionally(e); } - return writes.thenApply(ignored -> request.getMessage()); } SingleDataStream getSingleDataStream(RaftClientRequest request) { From 55cf4e537e78c6eda01ca7a1b1d0e5abc40a6bfe Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 26 May 2026 16:36:21 +0800 Subject: [PATCH 4/9] chore Signed-off-by: peterxcli --- .../ratis/server/impl/RaftServerImpl.java | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index 7b6f34d7cb..f6e37622e0 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -964,11 +964,13 @@ CompletableFuture executeSubmitServerRequestAsync( () -> JavaUtils.callAsUnchecked(submitFunction, CompletionException::new), serverExecutor).join(); } + CompletableFuture executeSubmitClientRequestAsync(RaftClientRequest request) { return CompletableFuture.supplyAsync( () -> JavaUtils.callAsUnchecked(() -> submitClientRequestAsync(request), CompletionException::new), clientExecutor).join(); } + CompletableFuture executeStreamReadOnlyAsync( RaftClientRequest request, StateMachine.DataChannel stream) { return CompletableFuture.supplyAsync(() -> JavaUtils.callAsUnchecked(() -> TraceServer.traceAsyncMethod(() -> { @@ -1001,6 +1003,7 @@ public CompletableFuture submitClientRequestAsync( () -> submitClientRequestAsyncInternal(request), request, getMemberId().toString(), SpanNames.SUBMIT_CLIENT_REQUEST_ASYNC); } + private CompletableFuture submitClientRequestAsyncInternal( RaftClientRequest request) throws IOException { assertLifeCycleState(LifeCycle.States.RUNNING); @@ -1017,6 +1020,7 @@ private CompletableFuture submitClientRequestAsyncInternal( private CompletableFuture replyFuture(RaftClientRequest request) throws IOException { retryCache.invalidateRepliedRequests(request); + final TypeCase type = request.getType().getTypeCase(); switch (type) { case STALEREAD: @@ -1034,6 +1038,7 @@ private CompletableFuture replyFuture(RaftClientRequest request throw new IllegalStateException("Unexpected request type: " + type + ", request=" + request); } } + private CompletableFuture writeAsync(RaftClientRequest request) throws IOException { final CompletableFuture future = writeAsyncImpl(request); if (request.is(TypeCase.WRITE)) { @@ -1045,11 +1050,13 @@ private CompletableFuture writeAsync(RaftClientRequest request) } return future; } + private CompletableFuture writeAsyncImpl(RaftClientRequest request) throws IOException { final CompletableFuture reply = checkLeaderState(request); if (reply != null) { return reply; } + // query the retry cache final RetryCacheImpl.CacheQueryResult queryResult = retryCache.queryCache(request); final CacheEntry cacheEntry = queryResult.getEntry(); @@ -1067,6 +1074,7 @@ private CompletableFuture writeAsyncImpl(RaftClientRequest requ final RaftClientReply exceptionReply = newExceptionReply(request, e); return failWithReply(exceptionReply, cacheEntry, context); } + try { return appendTransaction(request, context, cacheEntry); } catch (Exception e) { @@ -1074,14 +1082,17 @@ private CompletableFuture writeAsyncImpl(RaftClientRequest requ throw e; } } + private CompletableFuture watchAsync(RaftClientRequest request) { if (OrderedAsync.DUMMY.getContent().equals(request.getMessage().getContent())) { return CompletableFuture.completedFuture(RaftClientReply.newBuilder().setRequest(request).build()); } + final CompletableFuture reply = checkLeaderState(request); if (reply != null) { return reply; } + return role.getLeaderState() .map(ls -> ls.addWatchRequest(request)) .orElseGet(() -> CompletableFuture.completedFuture( @@ -1157,22 +1168,23 @@ private CompletableFuture readAsync( } private RaftClientReply readException2Reply(RaftClientRequest request, Throwable e) { e = JavaUtils.unwrapCompletionException(e); - if (e instanceof StateMachineException) { + if (e instanceof StateMachineException ) { return newExceptionReply(request, (StateMachineException) e); - } - if (e instanceof ReadException) { + } else if (e instanceof ReadException) { return newExceptionReply(request, (ReadException) e); - } - if (e instanceof ReadIndexException) { + } else if (e instanceof ReadIndexException) { return newExceptionReply(request, (ReadIndexException) e); + } else { + throw new CompletionException(e); } - throw new CompletionException(e); } + private CompletableFuture messageStreamAsync(RaftClientRequest request) throws IOException { final CompletableFuture reply = checkLeaderState(request); if (reply != null) { return reply; } + if (request.getType().getMessageStream().getEndOfRequest()) { final CompletableFuture f = streamEndOfRequestAsync(request); if (f.isCompletedExceptionally()) { @@ -1181,19 +1193,23 @@ private CompletableFuture messageStreamAsync(RaftClientRequest // the message stream has ended and the request become a WRITE request return replyFuture(f.join()); } + return role.getLeaderState() .map(ls -> ls.streamAsync(request)) .orElseGet(() -> CompletableFuture.completedFuture( newExceptionReply(request, generateNotLeaderException()))); } + private CompletableFuture streamEndOfRequestAsync(RaftClientRequest request) { return role.getLeaderState() .map(ls -> ls.streamEndOfRequestAsync(request)) .orElse(null); } + CompletableFuture queryStateMachine(RaftClientRequest request) { return processQueryFuture(stateMachine.query(request.getMessage()), request); } + CompletableFuture processQueryFuture( CompletableFuture queryFuture, RaftClientRequest request) { return queryFuture.thenApply(r -> newReplyBuilder(request).setSuccess().setMessage(r).build()) From ad8cbe25e9c4bea662856ff81c5d4e47ce9fccf8 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Tue, 26 May 2026 16:57:13 +0800 Subject: [PATCH 5/9] suppress FileLength for RaftServerImpl Signed-off-by: peterxcli --- dev-support/checkstyle.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev-support/checkstyle.xml b/dev-support/checkstyle.xml index db4954fb49..f7e168b029 100644 --- a/dev-support/checkstyle.xml +++ b/dev-support/checkstyle.xml @@ -60,6 +60,10 @@ + + + + From a6b4b15c4eb101fcfd8b6e77ee7a52cf574f477c Mon Sep 17 00:00:00 2001 From: peterxcli Date: Fri, 29 May 2026 17:59:42 +0800 Subject: [PATCH 6/9] suggestion Signed-off-by: peterxcli --- .../netty/server/DataStreamManagement.java | 97 ++++++++++--------- .../org/apache/ratis/server/RaftServer.java | 15 --- .../ratis/statemachine/StateMachine.java | 13 ++- .../ratis/server/impl/RaftServerImpl.java | 25 ----- .../ratis/server/impl/RaftServerProxy.java | 7 -- .../ratis/datastream/DataStreamTestUtils.java | 13 ++- 6 files changed, 64 insertions(+), 106 deletions(-) diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java index b0b93b7754..f3cb7ab94d 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java @@ -568,60 +568,61 @@ private static RaftClientRequest toRaftClientRequest(DataStreamRequestByteBuf re private CompletableFuture submitReadOnlyRequest(DataStreamRequestByteBuf request, RaftClientRequest raftClientRequest, ChannelHandlerContext ctx) { - try { - final StateMachine.DataChannel readOnlyDataStream = new StateMachine.DataChannel() { - private long streamOffset; - private boolean closed; + final DataChannel readOnlyDataStream = new DataChannel() { + private long streamOffset; + private boolean closed; - @Override - public synchronized boolean isOpen() { - return !closed; - } + @Override + public synchronized boolean isOpen() { + return !closed; + } - @Override - public synchronized void close() { - closed = true; - } + @Override + public synchronized void close() { + closed = true; + } - @Override - public synchronized void force(boolean metadata) throws IOException { - if (!isOpen()) { - throw new AlreadyClosedException("Channel closed at offset " + streamOffset); - } - ctx.flush(); + @Override + public synchronized void force(boolean metadata) throws IOException { + if (!isOpen()) { + throw new AlreadyClosedException("Channel closed at offset " + streamOffset); } + ctx.flush(); + } - @Override - public synchronized int write(ByteBuffer buffer) throws IOException { - if (!isOpen()) { - throw new AlreadyClosedException("Channel closed at offset " + streamOffset); - } - final int length = buffer.remaining(); - final DataStreamReplyByteBuffer reply = newDataStreamReadOnlyReplyByteBuffer(request, streamOffset, buffer); - final ChannelFuture future = ctx.writeAndFlush(reply); - try { - future.await(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new InterruptedIOException( - "Interrupted while writing " + length + " bytes at offset " + streamOffset); - } - if (!future.isSuccess()) { - final Throwable cause = future.cause(); - if (cause instanceof IOException) { - throw (IOException) cause; - } - throw new IOException("Failed to write " + length + " bytes at offset " + streamOffset, cause); - } - streamOffset += length; - return length; + @Override + public synchronized int write(ByteBuffer buffer) throws IOException { + if (!isOpen()) { + throw new AlreadyClosedException("Channel closed at offset " + streamOffset); } - }; - return server.streamReadOnlyAsync(raftClientRequest, readOnlyDataStream) - .thenCompose(reply -> writeAndFlush(ctx, newDataStreamReplyByteBuffer(request, reply))); - } catch (IOException e) { - return JavaUtils.completeExceptionally(e); - } + final int length = buffer.remaining(); + final DataStreamReplyByteBuffer reply = newDataStreamReadOnlyReplyByteBuffer(request, streamOffset, buffer); + final ChannelFuture future = ctx.writeAndFlush(reply); + try { + future.await(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new InterruptedIOException( + "Interrupted while writing " + length + " bytes at offset " + streamOffset); + } + if (!future.isSuccess()) { + throw new IOException("Failed to write " + length + " bytes at offset " + streamOffset, future.cause()); + } + streamOffset += length; + return length; + } + }; + + return CompletableFuture.supplyAsync(() -> JavaUtils.callAsUnchecked(() -> { + final Division division = server.getDivision(raftClientRequest.getRaftGroupId()); + division.getStateMachine().data().query(raftClientRequest.getMessage(), readOnlyDataStream); + return RaftClientReply.newBuilder() + .setRequest(raftClientRequest) + .setSuccess() + .setCommitInfos(division.getCommitInfos()) + .build(); + }, CompletionException::new), requestExecutor) + .thenCompose(reply -> writeAndFlush(ctx, newDataStreamReplyByteBuffer(request, reply))); } static void assertReplyCorrespondingToRequest( diff --git a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java index e6287cc6d4..84e3a1ed30 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/server/RaftServer.java @@ -24,7 +24,6 @@ import java.util.Collection; import java.util.Objects; import java.util.Optional; -import java.util.concurrent.CompletableFuture; import org.apache.ratis.conf.Parameters; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.proto.RaftProtos.CommitInfoProto; @@ -33,8 +32,6 @@ import org.apache.ratis.protocol.AdminProtocol; import org.apache.ratis.protocol.RaftClientAsynchronousProtocol; import org.apache.ratis.protocol.RaftClientProtocol; -import org.apache.ratis.protocol.RaftClientReply; -import org.apache.ratis.protocol.RaftClientRequest; import org.apache.ratis.protocol.RaftGroup; import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftGroupMemberId; @@ -153,18 +150,6 @@ default RaftGroup getGroup() { /** @return the data stream rpc service. */ DataStreamServerRpc getDataStreamServerRpc(); - /** - * Submit a read-only request whose response may be streamed through the data stream RPC. - * - * @param request the read-only request - * @param stream the stream for response data chunks - * @return a future for the terminal reply - */ - default CompletableFuture streamReadOnlyAsync( - RaftClientRequest request, StateMachine.DataChannel stream) throws IOException { - throw new UnsupportedOperationException("This method is NOT supported."); - } - /** @return the {@link RpcType}. */ default RpcType getRpcType() { return getFactory().getRpcType(); diff --git a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java index 114092062b..1448af1851 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java @@ -117,15 +117,14 @@ default CompletableFuture stream(RaftClientRequest request) { } /** - * Stream a read-only state machine request. Implementations may write zero or more data - * chunks before completing the returned future with the terminal reply message. + * Similar to {@link StateMachine#query(Message)} except below: + * - In {@link StateMachine#query(Message)}, the state machine returns the result in a future. + * - In this method, the state machine sends the result using the given stream. * - * @param request the read-only client request - * @param stream the output stream for response data chunks - * @return a future for the terminal reply message + * @param request the client request + * @param stream the output stream to send the result. The state machine must close the stream at the end. */ - default CompletableFuture streamReadOnly(RaftClientRequest request, DataChannel stream) { - throw new UnsupportedOperationException("This method is NOT supported."); + default void query(Message request, DataChannel stream) { } /** diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index f6e37622e0..c2a183d6bd 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -971,31 +971,6 @@ CompletableFuture executeSubmitClientRequestAsync(RaftClientReq clientExecutor).join(); } - CompletableFuture executeStreamReadOnlyAsync( - RaftClientRequest request, StateMachine.DataChannel stream) { - return CompletableFuture.supplyAsync(() -> JavaUtils.callAsUnchecked(() -> TraceServer.traceAsyncMethod(() -> { - assertLifeCycleState(LifeCycle.States.RUNNING); - if (!request.is(TypeCase.READ)) { - throw new IOException("Expected a read-only request but got " + request); - } - LOG.debug("{}: receive read-only stream request({})", getMemberId(), request); - final Timekeeper timer = raftServerMetrics.getClientRequestTimer(request.getType()); - final Optional timerContext = Optional.ofNullable(timer).map(Timekeeper::time); - return readAsync(request, r -> { - try { - return processQueryFuture(stateMachine.data().streamReadOnly(r, stream), r); - } catch (UnsupportedOperationException e) { - return queryStateMachine(r); - } - }).whenComplete((clientReply, exception) -> { - timerContext.ifPresent(Timekeeper.Context::stop); - if (exception != null || clientReply.getException() != null) { - raftServerMetrics.incFailedRequestCount(request.getType()); - } - }); - }, request, getMemberId().toString(), SpanNames.SUBMIT_CLIENT_REQUEST_ASYNC), CompletionException::new), - clientExecutor).thenCompose(Function.identity()); - } @Override public CompletableFuture submitClientRequestAsync( RaftClientRequest request) throws IOException { diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java index 5638735b23..8539fa99ec 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerProxy.java @@ -459,13 +459,6 @@ public CompletableFuture submitClientRequestAsync(RaftClientReq .thenCompose(impl -> impl.executeSubmitClientRequestAsync(request)); } - @Override - public CompletableFuture streamReadOnlyAsync( - RaftClientRequest request, StateMachine.DataChannel stream) { - return getImplFuture(request.getRaftGroupId()) - .thenCompose(impl -> impl.executeStreamReadOnlyAsync(request, stream)); - } - @Override public RaftClientReply submitClientRequest(RaftClientRequest request) throws IOException { diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java index b34cf758d2..473f1ae273 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java @@ -189,15 +189,20 @@ public CompletableFuture query(Message request) { } @Override - public CompletableFuture streamReadOnly(RaftClientRequest request, DataChannel stream) { + public void query(Message request, DataChannel stream) { try { for (int i = 0; i < READ_ONLY_STREAM_CHUNKS; i++) { - final ByteString chunk = getReadOnlyStreamChunk(request.getMessage().getContent(), i); + final ByteString chunk = getReadOnlyStreamChunk(request.getContent(), i); stream.write(chunk.asReadOnlyByteBuffer()); } - return CompletableFuture.completedFuture(Message.valueOf(getId().toByteString())); } catch (IOException e) { - return JavaUtils.completeExceptionally(e); + throw new CompletionException(e); + } finally { + try { + stream.close(); + } catch (IOException e) { + throw new CompletionException(e); + } } } From b67215510677b299c6618b587f317124a97aa565 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Fri, 29 May 2026 18:24:36 +0800 Subject: [PATCH 7/9] add test for data stream mgmt Signed-off-by: peterxcli --- .../server/TestDataStreamManagement.java | 130 ++++++++++++++---- 1 file changed, 105 insertions(+), 25 deletions(-) diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java b/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java index 5c06ddd319..a038717d58 100644 --- a/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java +++ b/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java @@ -17,34 +17,127 @@ */ package org.apache.ratis.netty.server; +import org.apache.ratis.client.impl.ClientProtoUtils; import org.apache.ratis.client.impl.DataStreamClientImpl.DataStreamOutputImpl; import org.apache.ratis.conf.RaftProperties; +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; import org.apache.ratis.io.StandardWriteOption; import org.apache.ratis.netty.metrics.NettyServerStreamRpcMetrics; import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.DataStreamReply; +import org.apache.ratis.protocol.Message; import org.apache.ratis.protocol.RaftClientRequest; +import org.apache.ratis.protocol.RaftGroupId; import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.server.RaftServer; +import org.apache.ratis.statemachine.StateMachine; +import org.apache.ratis.statemachine.StateMachine.DataApi; +import org.apache.ratis.statemachine.StateMachine.DataChannel; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.io.netty.buffer.Unpooled; import org.apache.ratis.thirdparty.io.netty.channel.ChannelHandlerContext; import org.apache.ratis.thirdparty.io.netty.channel.ChannelId; import org.apache.ratis.thirdparty.io.netty.channel.ChannelInboundHandlerAdapter; import org.apache.ratis.thirdparty.io.netty.channel.embedded.EmbeddedChannel; +import org.apache.ratis.util.JavaUtils; +import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedBiFunction; import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; import java.io.IOException; -import java.lang.reflect.Proxy; +import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.Set; +import java.util.concurrent.TimeUnit; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; class TestDataStreamManagement { + @Test + void readOnlyRequestInvokesStateMachineDataApiQuery() throws Exception { + final RaftPeerId serverId = RaftPeerId.valueOf("s1"); + final ClientId clientId = ClientId.randomId(); + final RaftGroupId groupId = RaftGroupId.randomId(); + final ByteString query = ByteString.copyFromUtf8("query"); + final ByteString response = ByteString.copyFromUtf8("response"); + + final StateMachine stateMachine = mock(StateMachine.class); + final DataApi dataApi = mock(DataApi.class); + when(stateMachine.data()).thenReturn(dataApi); + doAnswer(invocation -> { + final DataChannel stream = invocation.getArgument(1); + stream.write(response.asReadOnlyByteBuffer()); + stream.close(); + return null; + }).when(dataApi).query(any(Message.class), any(DataChannel.class)); + + final RaftServer.Division division = mock(RaftServer.Division.class); + when(division.getStateMachine()).thenReturn(stateMachine); + when(division.getCommitInfos()).thenReturn(Collections.emptyList()); + + final RaftServer server = newRaftServer(serverId, new RaftProperties()); + when(server.getDivision(groupId)).thenReturn(division); + final NettyServerStreamRpcMetrics metrics = new NettyServerStreamRpcMetrics("s1"); + final DataStreamManagement management = new DataStreamManagement(server, metrics); + final EmbeddedChannel embeddedChannel = new EmbeddedChannel(new ChannelInboundHandlerAdapter()); + + final RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() + .setClientId(clientId) + .setServerId(serverId) + .setGroupId(groupId) + .setCallId(1L) + .setMessage(Message.valueOf(query)) + .setType(RaftClientRequest.readRequestType()) + .build(); + final ByteBuffer header = ClientProtoUtils.toRaftClientRequestProtoByteBuffer(raftClientRequest); + final DataStreamRequestByteBuf request = new DataStreamRequestByteBuf( + clientId, + Type.STREAM_HEADER, + raftClientRequest.getCallId(), + 0L, + Collections.singletonList(StandardWriteOption.FLUSH), + Unpooled.wrappedBuffer(header)); + final CheckedBiFunction, Set, IOException> getStreams = + (r, p) -> Collections.emptySet(); + + try { + management.read(request, embeddedChannel.pipeline().firstContext(), getStreams); + + final List replies = new ArrayList<>(); + JavaUtils.attempt(() -> { + for (Object outbound; (outbound = embeddedChannel.readOutbound()) != null;) { + replies.add((DataStreamReply) outbound); + } + assertEquals(2, replies.size()); + }, 10, TimeDuration.valueOf(100, TimeUnit.MILLISECONDS), "read-only replies", null); + + final ArgumentCaptor messageCaptor = ArgumentCaptor.forClass(Message.class); + final ArgumentCaptor streamCaptor = ArgumentCaptor.forClass(DataChannel.class); + verify(dataApi).query(messageCaptor.capture(), streamCaptor.capture()); + assertEquals(query, messageCaptor.getValue().getContent()); + assertFalse(streamCaptor.getValue().isOpen(), "state machine should close the streaming query channel"); + assertSuccessReply(Type.STREAM_DATA, response.size(), replies.get(0)); + assertSuccessReply(Type.STREAM_HEADER, 0, replies.get(1)); + } finally { + embeddedChannel.finishAndReleaseAll(); + management.shutdown(); + } + } + @Test void readCleansChannelMapOnEarlyException() throws Exception { // Scenario: STREAM_DATA arrives without prior STREAM_HEADER, so readImpl fails early. @@ -85,30 +178,17 @@ void readCleansChannelMapOnEarlyException() throws Exception { } } + private static void assertSuccessReply(Type expectedType, long expectedBytesWritten, DataStreamReply reply) { + assertEquals(expectedType, reply.getType()); + assertTrue(reply.isSuccess()); + assertEquals(expectedBytesWritten, reply.getBytesWritten()); + assertTrue(reply instanceof DataStreamReplyByteBuffer); + } + private static RaftServer newRaftServer(RaftPeerId serverId, RaftProperties properties) { - return (RaftServer) Proxy.newProxyInstance(TestDataStreamManagement.class.getClassLoader(), - new Class[]{RaftServer.class}, - (proxy, method, args) -> { - if (method.getDeclaringClass() == Object.class) { - switch (method.getName()) { - case "toString": - return "RaftServerProxy(" + serverId + ")"; - case "hashCode": - return System.identityHashCode(proxy); - case "equals": - return proxy == args[0]; - default: - return null; - } - } - switch (method.getName()) { - case "getId": - return serverId; - case "getProperties": - return properties; - default: - throw new UnsupportedOperationException("Unexpected RaftServer call: " + method); - } - }); + final RaftServer server = mock(RaftServer.class); + when(server.getId()).thenReturn(serverId); + when(server.getProperties()).thenReturn(properties); + return server; } } From d9b4431de32d1fd6911f56ba76eac4e1ed52f6ce Mon Sep 17 00:00:00 2001 From: peterxcli Date: Fri, 29 May 2026 18:47:37 +0800 Subject: [PATCH 8/9] revert raft server impl Signed-off-by: peterxcli --- .../ratis/server/impl/RaftServerImpl.java | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java index c2a183d6bd..f758fd0edc 100644 --- a/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java +++ b/ratis-server/src/main/java/org/apache/ratis/server/impl/RaftServerImpl.java @@ -1110,36 +1110,37 @@ private CompletableFuture getReadIndex(RaftClientRequest request, LeaderSt return writeIndexCache.getWriteIndexFuture(request).thenCompose(leader::getReadIndex); } private CompletableFuture readAsync(RaftClientRequest request) { - return readAsync(request, this::queryStateMachine); - } - - private CompletableFuture readAsync( - RaftClientRequest request, Function> query) { if (request.getType().getRead().getPreferNonLinearizable() || readOption == RaftServerConfigKeys.Read.Option.DEFAULT) { final CompletableFuture reply = checkLeaderState(request); - if (reply != null) { - return reply; - } - return query.apply(request); - } - if (readOption != RaftServerConfigKeys.Read.Option.LINEARIZABLE) { - throw new IllegalStateException("Unexpected read option: " + readOption); - } - final LeaderStateImpl leader = role.getLeaderState().orElse(null); - final CompletableFuture replyFuture = leader != null ? getReadIndex(request, leader) - : sendReadIndexAsync(request).thenApply(reply -> { + if (reply != null) { + return reply; + } + return queryStateMachine(request); + } else if (readOption == RaftServerConfigKeys.Read.Option.LINEARIZABLE){ + final LeaderStateImpl leader = role.getLeaderState().orElse(null); + final CompletableFuture replyFuture; + if (leader != null) { + replyFuture = getReadIndex(request, leader); + } else { + replyFuture = sendReadIndexAsync(request).thenApply(reply -> { if (reply.getServerReply().getSuccess()) { return reply.getReadIndex(); + } else { + throw new CompletionException(new ReadIndexException(getId() + + ": Failed to get read index from the leader: " + reply)); } - throw new CompletionException(new ReadIndexException(getId() - + ": Failed to get read index from the leader: " + reply)); }); - return replyFuture - .thenCompose(readIndex -> getState().getReadRequests().waitToAdvance(readIndex, - () -> getReadException("add", snapshotInstallationHandler.getInProgressInstallSnapshotIndex(), false))) - .thenCompose(readIndex -> query.apply(request)) - .exceptionally(e -> readException2Reply(request, e)); + } + + return replyFuture + .thenCompose(readIndex -> getState().getReadRequests().waitToAdvance(readIndex, + () -> getReadException("add", snapshotInstallationHandler.getInProgressInstallSnapshotIndex(), false))) + .thenCompose(readIndex -> queryStateMachine(request)) + .exceptionally(e -> readException2Reply(request, e)); + } else { + throw new IllegalStateException("Unexpected read option: " + readOption); + } } private RaftClientReply readException2Reply(RaftClientRequest request, Throwable e) { e = JavaUtils.unwrapCompletionException(e); From 611b62aa13b624fb7df25a33139ad9dee5bbc675 Mon Sep 17 00:00:00 2001 From: peterxcli Date: Sat, 30 May 2026 09:13:04 +0800 Subject: [PATCH 9/9] suggestion Co-authored-by: Tsz-Wo Nicholas Sze Signed-off-by: peterxcli --- dev-support/checkstyle.xml | 4 - .../netty/server/DataStreamManagement.java | 114 ------------- .../netty/server/NettyServerStreamRpc.java | 6 + .../netty/server/ReadStreamManagement.java | 158 ++++++++++++++++++ .../ratis/statemachine/StateMachine.java | 10 +- .../ratis/datastream/DataStreamTestUtils.java | 24 ++- .../server/TestDataStreamManagement.java | 127 +++++++++----- 7 files changed, 269 insertions(+), 174 deletions(-) create mode 100644 ratis-netty/src/main/java/org/apache/ratis/netty/server/ReadStreamManagement.java diff --git a/dev-support/checkstyle.xml b/dev-support/checkstyle.xml index f7e168b029..db4954fb49 100644 --- a/dev-support/checkstyle.xml +++ b/dev-support/checkstyle.xml @@ -60,10 +60,6 @@ - - - - diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java index f3cb7ab94d..4e337e06b2 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/DataStreamManagement.java @@ -33,7 +33,6 @@ import org.apache.ratis.proto.RaftProtos.CommitInfoProto; import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto; -import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto.TypeCase; import org.apache.ratis.protocol.ClientId; import org.apache.ratis.protocol.ClientInvocationId; import org.apache.ratis.protocol.DataStreamReply; @@ -43,7 +42,6 @@ import org.apache.ratis.protocol.RaftPeer; import org.apache.ratis.protocol.RaftPeerId; import org.apache.ratis.protocol.RoutingTable; -import org.apache.ratis.protocol.exceptions.AlreadyClosedException; import org.apache.ratis.protocol.exceptions.AlreadyExistsException; import org.apache.ratis.protocol.exceptions.DataStreamException; import org.apache.ratis.server.RaftConfiguration; @@ -55,7 +53,6 @@ import org.apache.ratis.statemachine.StateMachine.DataChannel; import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; -import org.apache.ratis.thirdparty.io.netty.channel.ChannelFuture; import org.apache.ratis.thirdparty.io.netty.channel.ChannelHandlerContext; import org.apache.ratis.thirdparty.io.netty.channel.ChannelId; import org.apache.ratis.util.ConcurrentUtils; @@ -71,7 +68,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.io.InterruptedIOException; import java.nio.ByteBuffer; import java.util.Collection; import java.util.Collections; @@ -360,32 +356,6 @@ static DataStreamReplyByteBuffer newDataStreamReplyByteBuffer(DataStreamRequestB .build(); } - static DataStreamReplyByteBuffer newDataStreamReadOnlyReplyByteBuffer(DataStreamRequestByteBuf request, - long streamOffset, ByteBuffer buffer) { - final ByteBuffer readOnlyBuffer = buffer.asReadOnlyBuffer(); - return DataStreamReplyByteBuffer.newBuilder() - .setClientId(request.getClientId()) - .setType(Type.STREAM_DATA) - .setStreamId(request.getStreamId()) - .setStreamOffset(streamOffset) - .setBuffer(readOnlyBuffer) - .setSuccess(true) - .setBytesWritten(readOnlyBuffer.remaining()) - .build(); - } - - private static CompletableFuture writeAndFlush(ChannelHandlerContext ctx, DataStreamReply reply) { - final CompletableFuture future = new CompletableFuture<>(); - ctx.writeAndFlush(reply).addListener(channelFuture -> { - if (channelFuture.isSuccess()) { - future.complete(null); - } else { - future.completeExceptionally(channelFuture.cause()); - } - }); - return future; - } - private void sendReply(List> remoteWrites, DataStreamRequestByteBuf request, long bytesWritten, Collection commitInfos, ChannelHandlerContext ctx) { @@ -481,23 +451,6 @@ private void readImpl(DataStreamRequestByteBuf request, ChannelHandlerContext ct // add to ChannelMap channels.add(channelId, key); - if (request.getType() == Type.STREAM_HEADER) { - final RaftClientRequest raftClientRequest = toRaftClientRequest(request); - if (raftClientRequest.is(TypeCase.READ)) { - submitReadOnlyRequest(request, raftClientRequest, ctx).whenComplete((v, exception) -> { - try { - if (exception != null) { - replyDataStreamException(server, exception, raftClientRequest, request, ctx); - } - } finally { - request.release(); - channels.remove(channelId, key); - } - }); - return; - } - } - final StreamInfo info; if (request.getType() == Type.STREAM_HEADER) { final MemoizedSupplier supplier = JavaUtils.memoize( @@ -558,73 +511,6 @@ private void readImpl(DataStreamRequestByteBuf request, ChannelHandlerContext ct }); } - private static RaftClientRequest toRaftClientRequest(DataStreamRequestByteBuf request) { - try { - return ClientProtoUtils.toRaftClientRequest(RaftClientRequestProto.parseFrom(request.slice().nioBuffer())); - } catch (Throwable e) { - throw new CompletionException(e); - } - } - - private CompletableFuture submitReadOnlyRequest(DataStreamRequestByteBuf request, - RaftClientRequest raftClientRequest, ChannelHandlerContext ctx) { - final DataChannel readOnlyDataStream = new DataChannel() { - private long streamOffset; - private boolean closed; - - @Override - public synchronized boolean isOpen() { - return !closed; - } - - @Override - public synchronized void close() { - closed = true; - } - - @Override - public synchronized void force(boolean metadata) throws IOException { - if (!isOpen()) { - throw new AlreadyClosedException("Channel closed at offset " + streamOffset); - } - ctx.flush(); - } - - @Override - public synchronized int write(ByteBuffer buffer) throws IOException { - if (!isOpen()) { - throw new AlreadyClosedException("Channel closed at offset " + streamOffset); - } - final int length = buffer.remaining(); - final DataStreamReplyByteBuffer reply = newDataStreamReadOnlyReplyByteBuffer(request, streamOffset, buffer); - final ChannelFuture future = ctx.writeAndFlush(reply); - try { - future.await(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new InterruptedIOException( - "Interrupted while writing " + length + " bytes at offset " + streamOffset); - } - if (!future.isSuccess()) { - throw new IOException("Failed to write " + length + " bytes at offset " + streamOffset, future.cause()); - } - streamOffset += length; - return length; - } - }; - - return CompletableFuture.supplyAsync(() -> JavaUtils.callAsUnchecked(() -> { - final Division division = server.getDivision(raftClientRequest.getRaftGroupId()); - division.getStateMachine().data().query(raftClientRequest.getMessage(), readOnlyDataStream); - return RaftClientReply.newBuilder() - .setRequest(raftClientRequest) - .setSuccess() - .setCommitInfos(division.getCommitInfos()) - .build(); - }, CompletionException::new), requestExecutor) - .thenCompose(reply -> writeAndFlush(ctx, newDataStreamReplyByteBuffer(request, reply))); - } - static void assertReplyCorrespondingToRequest( final DataStreamRequestByteBuf request, final DataStreamReply reply) { Preconditions.assertTrue(request.getClientId().equals(reply.getClientId())); diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java index 451040bb62..24303d867e 100644 --- a/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/NettyServerStreamRpc.java @@ -152,6 +152,7 @@ void close() { private final ChannelFuture channelFuture; private final DataStreamManagement requests; + private final ReadStreamManagement reads; private final ProxiesPool proxies; private final NettyServerStreamRpcMetrics metrics; @@ -162,6 +163,7 @@ public NettyServerStreamRpc(RaftServer server, Parameters parameters) { this.name = server.getId() + "-" + JavaUtils.getClassSimpleName(getClass()); this.metrics = new NettyServerStreamRpcMetrics(this.name); this.requests = new DataStreamManagement(server, metrics); + this.reads = new ReadStreamManagement(server); final RaftProperties properties = server.getProperties(); @@ -235,6 +237,9 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) { final DataStreamRequestByteBuf request = (DataStreamRequestByteBuf)msg; try(UncheckedAutoCloseable autoReset = requestRef.set(request)) { + if (reads.process(request, ctx)) { + return; + } requests.read(request, ctx, proxies.get(request)::getDataStreamOutput); } } @@ -248,6 +253,7 @@ public void channelInactive(ChannelHandlerContext ctx) { public void exceptionCaught(ChannelHandlerContext ctx, Throwable throwable) { Optional.ofNullable(requestRef.getAndSetNull()) .ifPresent(request -> requests.replyDataStreamException(throwable, request, ctx)); + ctx.close(); } }; } diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/server/ReadStreamManagement.java b/ratis-netty/src/main/java/org/apache/ratis/netty/server/ReadStreamManagement.java new file mode 100644 index 0000000000..bcdced1eac --- /dev/null +++ b/ratis-netty/src/main/java/org/apache/ratis/netty/server/ReadStreamManagement.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ratis.netty.server; + +import org.apache.ratis.datastream.impl.DataStreamReplyByteBuffer; +import org.apache.ratis.datastream.impl.DataStreamRequestByteBuf; +import org.apache.ratis.proto.RaftProtos.DataStreamPacketHeaderProto.Type; +import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto; +import org.apache.ratis.proto.RaftProtos.RaftClientRequestProto.TypeCase; +import org.apache.ratis.protocol.ClientId; +import org.apache.ratis.protocol.RaftClientRequest; +import org.apache.ratis.protocol.exceptions.AlreadyClosedException; +import org.apache.ratis.server.RaftServer; +import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelFuture; +import org.apache.ratis.thirdparty.io.netty.channel.ChannelHandlerContext; +import org.apache.ratis.util.JavaUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.concurrent.CompletableFuture; + +import static org.apache.ratis.client.impl.ClientProtoUtils.toRaftClientRequest; +import static org.apache.ratis.netty.server.DataStreamManagement.replyDataStreamException; + +public class ReadStreamManagement { + public static final Logger LOG = LoggerFactory.getLogger(ReadStreamManagement.class); + + static class ReadStream implements WritableByteChannel { + private final ClientId clientId; + private final long streamId; + private final ChannelHandlerContext ctx; + private final CompletableFuture closed = new CompletableFuture<>(); + private long streamOffset; + + ReadStream(DataStreamRequestByteBuf request, ChannelHandlerContext ctx) { + this.clientId = request.getClientId(); + this.streamId = request.getStreamId(); + this.ctx = ctx; + } + + @Override + public boolean isOpen() { + return !closed.isDone(); + } + + @Override + public void close() { + closed.complete(null); + } + + @Override + public synchronized int write(ByteBuffer buffer) throws IOException { + if (!isOpen()) { + throw new AlreadyClosedException("Channel closed at offset " + streamOffset); + } + buffer = buffer.asReadOnlyBuffer(); + final int length = buffer.remaining(); + final DataStreamReplyByteBuffer reply = newReply(buffer); + final ChannelFuture future = ctx.writeAndFlush(reply); + try { + future.await(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new InterruptedIOException( + "Interrupted while writing " + length + " bytes at offset " + streamOffset); + } + if (!future.isSuccess()) { + throw new IOException("Failed to write " + length + " bytes at offset " + streamOffset, future.cause()); + } + streamOffset += length; + return length; + } + + private synchronized DataStreamReplyByteBuffer newReply(ByteBuffer buffer) { + return DataStreamReplyByteBuffer.newBuilder() + .setClientId(clientId) + .setType(Type.STREAM_DATA) + .setStreamId(streamId) + .setStreamOffset(streamOffset) + .setBuffer(buffer) + .setSuccess(true) + .setBytesWritten(buffer.remaining()) + .build(); + } + } + + private final RaftServer server; + private final String name; + + ReadStreamManagement(RaftServer server) { + this.server = server; + this.name = server.getId() + "-" + JavaUtils.getClassSimpleName(getClass()); + } + + boolean process(DataStreamRequestByteBuf requestBuf, ChannelHandlerContext ctx) { + boolean processed = false; + try { + processed = processImpl(requestBuf, ctx); + } catch (Throwable e) { + LOG.error("Failed to process {}", requestBuf, e); + processed = true; + } finally { + if (processed) { + requestBuf.release(); + } + } + return processed; + } + + private boolean processImpl(DataStreamRequestByteBuf requestBuf, ChannelHandlerContext ctx) + throws InvalidProtocolBufferException { + if (requestBuf.getType() != Type.STREAM_HEADER) { + return false; + } + final RaftClientRequest request = toRaftClientRequest( + RaftClientRequestProto.parseFrom(requestBuf.slice().nioBuffer())); + if (!request.is(TypeCase.READ)) { + return false; + } + + final RaftServer.Division division; + try { + division = server.getDivision(request.getRaftGroupId()); + } catch (IOException e) { + replyDataStreamException(server, e, request, requestBuf, ctx); + return true; + } + + final ReadStream stream = new ReadStream(requestBuf, ctx); + division.getStateMachine().data().query(request.getMessage(), stream); + return true; + } + + @Override + public String toString() { + return name; + } +} diff --git a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java index 1448af1851..61e708febb 100644 --- a/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java +++ b/ratis-server-api/src/main/java/org/apache/ratis/statemachine/StateMachine.java @@ -117,14 +117,14 @@ default CompletableFuture stream(RaftClientRequest request) { } /** - * Similar to {@link StateMachine#query(Message)} except below: - * - In {@link StateMachine#query(Message)}, the state machine returns the result in a future. - * - In this method, the state machine sends the result using the given stream. + * Similar to {@link #query(Message)} except that + * {@link #query(Message)} returns the result in a future + * while this method sends the result using the given stream. * * @param request the client request - * @param stream the output stream to send the result. The state machine must close the stream at the end. + * @param stream the output stream to send the results */ - default void query(Message request, DataChannel stream) { + default void query(Message request, WritableByteChannel stream) { } /** diff --git a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java index 473f1ae273..fe9c3f9ea1 100644 --- a/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java +++ b/ratis-test/src/test/java/org/apache/ratis/datastream/DataStreamTestUtils.java @@ -61,6 +61,7 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; +import java.nio.channels.WritableByteChannel; import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Collection; @@ -153,7 +154,7 @@ class MultiDataStreamStateMachine extends BaseStateMachine { private final ConcurrentMap streams = new ConcurrentHashMap<>(); static ByteString getReadOnlyStreamChunk(ByteString query, int index) { - return ByteString.copyFromUtf8(query.toStringUtf8() + "-chunk-" + index); + return query.concat(ByteString.copyFromUtf8("-chunk-" + index)); } @Override @@ -189,20 +190,25 @@ public CompletableFuture query(Message request) { } @Override - public void query(Message request, DataChannel stream) { + public void query(Message request, WritableByteChannel stream) { + CompletableFuture.supplyAsync(() -> { + try { + streamReadOnlyImpl(request, stream); + } catch (IOException e) { + throw new CompletionException("Failed to streamReadOnly for " + request, e); + } + return null; + }); + } + + private void streamReadOnlyImpl(Message request, WritableByteChannel stream) throws IOException { try { for (int i = 0; i < READ_ONLY_STREAM_CHUNKS; i++) { final ByteString chunk = getReadOnlyStreamChunk(request.getContent(), i); stream.write(chunk.asReadOnlyByteBuffer()); } - } catch (IOException e) { - throw new CompletionException(e); } finally { - try { - stream.close(); - } catch (IOException e) { - throw new CompletionException(e); - } + stream.close(); } } diff --git a/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java b/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java index a038717d58..1573a2a283 100644 --- a/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java +++ b/ratis-test/src/test/java/org/apache/ratis/netty/server/TestDataStreamManagement.java @@ -35,8 +35,9 @@ import org.apache.ratis.server.RaftServer; import org.apache.ratis.statemachine.StateMachine; import org.apache.ratis.statemachine.StateMachine.DataApi; -import org.apache.ratis.statemachine.StateMachine.DataChannel; +import org.apache.ratis.statemachine.impl.BaseStateMachine; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; +import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.thirdparty.io.netty.buffer.Unpooled; import org.apache.ratis.thirdparty.io.netty.channel.ChannelHandlerContext; import org.apache.ratis.thirdparty.io.netty.channel.ChannelId; @@ -46,53 +47,49 @@ import org.apache.ratis.util.TimeDuration; import org.apache.ratis.util.function.CheckedBiFunction; import org.junit.jupiter.api.Test; -import org.mockito.ArgumentCaptor; import java.io.IOException; +import java.lang.reflect.Proxy; import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; class TestDataStreamManagement { @Test - void readOnlyRequestInvokesStateMachineDataApiQuery() throws Exception { + void readOnlyRequestInvokesReadStreamManagement() throws Exception { final RaftPeerId serverId = RaftPeerId.valueOf("s1"); final ClientId clientId = ClientId.randomId(); final RaftGroupId groupId = RaftGroupId.randomId(); final ByteString query = ByteString.copyFromUtf8("query"); final ByteString response = ByteString.copyFromUtf8("response"); - final StateMachine stateMachine = mock(StateMachine.class); - final DataApi dataApi = mock(DataApi.class); - when(stateMachine.data()).thenReturn(dataApi); - doAnswer(invocation -> { - final DataChannel stream = invocation.getArgument(1); - stream.write(response.asReadOnlyByteBuffer()); - stream.close(); - return null; - }).when(dataApi).query(any(Message.class), any(DataChannel.class)); - - final RaftServer.Division division = mock(RaftServer.Division.class); - when(division.getStateMachine()).thenReturn(stateMachine); - when(division.getCommitInfos()).thenReturn(Collections.emptyList()); - - final RaftServer server = newRaftServer(serverId, new RaftProperties()); - when(server.getDivision(groupId)).thenReturn(division); - final NettyServerStreamRpcMetrics metrics = new NettyServerStreamRpcMetrics("s1"); - final DataStreamManagement management = new DataStreamManagement(server, metrics); + final AtomicReference messageRef = new AtomicReference<>(); + final AtomicReference streamRef = new AtomicReference<>(); + final DataApi dataApi = new DataApi() { + @Override + public void query(Message request, WritableByteChannel stream) { + messageRef.set(request); + streamRef.set(stream); + } + }; + final StateMachine stateMachine = new BaseStateMachine() { + @Override + public DataApi data() { + return dataApi; + } + }; + final RaftServer server = newRaftServer(serverId, new RaftProperties(), groupId, newDivision(stateMachine)); + final ReadStreamManagement management = new ReadStreamManagement(server); final EmbeddedChannel embeddedChannel = new EmbeddedChannel(new ChannelInboundHandlerAdapter()); final RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder() @@ -104,37 +101,37 @@ void readOnlyRequestInvokesStateMachineDataApiQuery() throws Exception { .setType(RaftClientRequest.readRequestType()) .build(); final ByteBuffer header = ClientProtoUtils.toRaftClientRequestProtoByteBuffer(raftClientRequest); + final ByteBuf headerBuf = Unpooled.wrappedBuffer(header); final DataStreamRequestByteBuf request = new DataStreamRequestByteBuf( clientId, Type.STREAM_HEADER, raftClientRequest.getCallId(), 0L, Collections.singletonList(StandardWriteOption.FLUSH), - Unpooled.wrappedBuffer(header)); - final CheckedBiFunction, Set, IOException> getStreams = - (r, p) -> Collections.emptySet(); + headerBuf); try { - management.read(request, embeddedChannel.pipeline().firstContext(), getStreams); + assertTrue(management.process(request, embeddedChannel.pipeline().firstContext())); + assertEquals(0, headerBuf.refCnt()); + + final WritableByteChannel stream = streamRef.get(); + assertNotNull(stream); + stream.write(response.asReadOnlyByteBuffer()); + stream.close(); final List replies = new ArrayList<>(); JavaUtils.attempt(() -> { for (Object outbound; (outbound = embeddedChannel.readOutbound()) != null;) { replies.add((DataStreamReply) outbound); } - assertEquals(2, replies.size()); + assertEquals(1, replies.size()); }, 10, TimeDuration.valueOf(100, TimeUnit.MILLISECONDS), "read-only replies", null); - final ArgumentCaptor messageCaptor = ArgumentCaptor.forClass(Message.class); - final ArgumentCaptor streamCaptor = ArgumentCaptor.forClass(DataChannel.class); - verify(dataApi).query(messageCaptor.capture(), streamCaptor.capture()); - assertEquals(query, messageCaptor.getValue().getContent()); - assertFalse(streamCaptor.getValue().isOpen(), "state machine should close the streaming query channel"); + assertEquals(query, messageRef.get().getContent()); + assertFalse(streamRef.get().isOpen(), "state machine should close the streaming query channel"); assertSuccessReply(Type.STREAM_DATA, response.size(), replies.get(0)); - assertSuccessReply(Type.STREAM_HEADER, 0, replies.get(1)); } finally { embeddedChannel.finishAndReleaseAll(); - management.shutdown(); } } @@ -186,9 +183,55 @@ private static void assertSuccessReply(Type expectedType, long expectedBytesWrit } private static RaftServer newRaftServer(RaftPeerId serverId, RaftProperties properties) { - final RaftServer server = mock(RaftServer.class); - when(server.getId()).thenReturn(serverId); - when(server.getProperties()).thenReturn(properties); - return server; + return newRaftServer(serverId, properties, null, null); + } + + private static RaftServer newRaftServer(RaftPeerId serverId, RaftProperties properties, + RaftGroupId groupId, RaftServer.Division division) { + return (RaftServer) Proxy.newProxyInstance(RaftServer.class.getClassLoader(), new Class[]{RaftServer.class}, + (proxy, method, args) -> { + switch (method.getName()) { + case "getId": + return serverId; + case "getProperties": + return properties; + case "getDivision": + if (groupId != null && groupId.equals(args[0])) { + return division; + } + throw new IOException("Division not found: " + args[0]); + case "close": + return null; + case "toString": + return serverId.toString(); + case "hashCode": + return System.identityHashCode(proxy); + case "equals": + return proxy == args[0]; + default: + throw new UnsupportedOperationException(method.toString()); + } + }); + } + + private static RaftServer.Division newDivision(StateMachine stateMachine) { + return (RaftServer.Division) Proxy.newProxyInstance(RaftServer.Division.class.getClassLoader(), + new Class[]{RaftServer.Division.class}, + (proxy, method, args) -> { + switch (method.getName()) { + case "getStateMachine": + return stateMachine; + case "close": + return null; + case "toString": + return stateMachine.toString(); + case "hashCode": + return System.identityHashCode(proxy); + case "equals": + return proxy == args[0]; + default: + throw new UnsupportedOperationException(method.toString()); + } + }); } }