-
Notifications
You must be signed in to change notification settings - Fork 599
HDDS-14990. Show failed volumes in ozone admin datanode list output and SCM metrics
#10058
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f6c1b57
4ab01e6
c4d22d3
b48fb63
008d8ee
782048a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,6 +65,14 @@ public class ListInfoSubcommand extends ScmSubcommand { | |
| defaultValue = "false") | ||
| private boolean json; | ||
|
|
||
| @CommandLine.Option(names = {"--nodes-with-failed-volumes"}, | ||
| description = "Only show datanodes that have at least one failed volume.", | ||
| defaultValue = "false") | ||
| private boolean nodeWithFailedVolumes; | ||
|
|
||
| @CommandLine.Spec | ||
| private CommandLine.Model.CommandSpec spec; | ||
|
|
||
| @CommandLine.ArgGroup(exclusive = true, multiplicity = "0..1") | ||
| private ExclusiveNodeOptions exclusiveNodeOptions; | ||
|
|
||
|
|
@@ -85,14 +93,16 @@ static class ExclusiveNodeOptions extends NodeSelectionMixin { | |
|
|
||
| @Override | ||
| public void execute(ScmClient scmClient) throws IOException { | ||
| if (nodeWithFailedVolumes && exclusiveNodeOptions != null | ||
| && !Strings.isNullOrEmpty(exclusiveNodeOptions.getNodeId())) { | ||
| throw new CommandLine.ParameterException(spec.commandLine(), | ||
| "--nodes-with-failed-volumes cannot be used with --id/--node-id. " | ||
| + "Use them separately."); | ||
| } | ||
|
Comment on lines
+96
to
+101
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Picocli reports argument conflicts as CommandLine.MutuallyExclusiveArgsException (a subclass of CommandLine.ParameterException). Throwing IOException from execute() bypasses picocli's error handling entirely, producing a different error format than what the user would see from other mutual-exclusion violations. so change this to throw new CommandLine.ParameterException. (you can refer to DecommissionStatusSubCommand to see how it can be used.)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update the test as well with the correct exception. |
||
| pipelines = scmClient.listPipelines(); | ||
| if (exclusiveNodeOptions != null && !Strings.isNullOrEmpty(exclusiveNodeOptions.getNodeId())) { | ||
| HddsProtos.Node node = scmClient.queryNode(UUID.fromString(exclusiveNodeOptions.getNodeId())); | ||
| Integer totalVolumeCount = node.hasTotalVolumeCount() ? node.getTotalVolumeCount() : null; | ||
| Integer healthyVolumeCount = node.hasHealthyVolumeCount() ? node.getHealthyVolumeCount() : null; | ||
| BasicDatanodeInfo singleNodeInfo = new BasicDatanodeInfo.Builder( | ||
| DatanodeDetails.getFromProtoBuf(node.getNodeID()), node.getNodeOperationalStates(0), | ||
| node.getNodeStates(0)).withVolumeCounts(totalVolumeCount, healthyVolumeCount).build(); | ||
| BasicDatanodeInfo singleNodeInfo = new BasicDatanodeInfo.Builder(node).build(); | ||
| if (json) { | ||
| List<BasicDatanodeInfo> dtoList = Collections.singletonList(singleNodeInfo); | ||
| System.out.println(JsonUtils.toJsonStringWithDefaultPrettyPrinter(dtoList)); | ||
|
|
@@ -118,6 +128,10 @@ public void execute(ScmClient scmClient) throws IOException { | |
| allNodes = allNodes.filter(p -> p.getHealthState().toString() | ||
| .compareToIgnoreCase(nodeState) == 0); | ||
| } | ||
| if (nodeWithFailedVolumes) { | ||
| allNodes = allNodes.filter(p -> | ||
| p.getFailedVolumes() != null && !p.getFailedVolumes().isEmpty()); | ||
| } | ||
|
|
||
| if (!listLimitOptions.isAll()) { | ||
| allNodes = allNodes.limit(listLimitOptions.getLimit()); | ||
|
|
@@ -154,13 +168,9 @@ private List<BasicDatanodeInfo> getAllNodes(ScmClient scmClient) | |
| long capacity = p.getCapacity(); | ||
| long used = capacity - p.getRemaining(); | ||
| double percentUsed = (capacity > 0) ? (used * 100.0) / capacity : 0.0; | ||
| Integer totalVolumeCount = node.hasTotalVolumeCount() ? node.getTotalVolumeCount() : null; | ||
| Integer healthyVolumeCount = node.hasHealthyVolumeCount() ? node.getHealthyVolumeCount() : null; | ||
| return new BasicDatanodeInfo.Builder( | ||
| DatanodeDetails.getFromProtoBuf(node.getNodeID()), | ||
| node.getNodeOperationalStates(0), node.getNodeStates(0)) | ||
| return new BasicDatanodeInfo.Builder(node) | ||
| .withUsageInfo(used, capacity, percentUsed) | ||
| .withVolumeCounts(totalVolumeCount, healthyVolumeCount).build(); | ||
| .build(); | ||
| } catch (Exception e) { | ||
| String reason = "Could not process info for an unknown datanode"; | ||
| if (p != null && p.getNode() != null && !Strings.isNullOrEmpty(p.getNode().getUuid())) { | ||
|
|
@@ -177,12 +187,7 @@ private List<BasicDatanodeInfo> getAllNodes(ScmClient scmClient) | |
| List<HddsProtos.Node> nodes = scmClient.queryNode(null, | ||
| null, HddsProtos.QueryScope.CLUSTER, ""); | ||
|
|
||
| return nodes.stream().map(p -> { | ||
| Integer totalVolumeCount = p.hasTotalVolumeCount() ? p.getTotalVolumeCount() : null; | ||
| Integer healthyVolumeCount = p.hasHealthyVolumeCount() ? p.getHealthyVolumeCount() : null; | ||
| return new BasicDatanodeInfo.Builder( | ||
| DatanodeDetails.getFromProtoBuf(p.getNodeID()), p.getNodeOperationalStates(0), p.getNodeStates(0)) | ||
| .withVolumeCounts(totalVolumeCount, healthyVolumeCount).build(); }) | ||
| return nodes.stream().map(p -> new BasicDatanodeInfo.Builder(p).build()) | ||
| .sorted(Comparator.comparing(BasicDatanodeInfo::getHealthState)) | ||
| .collect(Collectors.toList()); | ||
| } | ||
|
|
@@ -206,10 +211,12 @@ private void printDatanodeInfo(BasicDatanodeInfo dn) { | |
| .append('/').append(p.getPipelineState().toString()).append('/') | ||
| .append(datanode.getID().equals(p.getLeaderId()) ? | ||
| "Leader" : "Follower") | ||
| .append(System.getProperty("line.separator"))); | ||
| .append('\n')); | ||
| } | ||
| } else { | ||
| pipelineListInfo.append("No pipelines in cluster."); | ||
| pipelineListInfo | ||
| .append("No pipelines in cluster.") | ||
| .append('\n'); | ||
| } | ||
| System.out.println("Datanode: " + datanode.getUuid().toString() + | ||
| " (" + datanode.getNetworkLocation() + "/" + datanode.getIpAddress() | ||
|
|
@@ -221,6 +228,12 @@ private void printDatanodeInfo(BasicDatanodeInfo dn) { | |
| System.out.println("Total volume count: " + dn.getTotalVolumeCount() + "\n" + | ||
| "Healthy volume count: " + dn.getHealthyVolumeCount()); | ||
| } | ||
| if (dn.getFailedVolumes() != null && !dn.getFailedVolumes().isEmpty()) { | ||
| System.out.println("Failed volumes:"); | ||
| for (String vol : dn.getFailedVolumes()) { | ||
| System.out.println(" " + vol); | ||
| } | ||
| } | ||
| System.out.println("Related pipelines:\n" + pipelineListInfo); | ||
|
|
||
| if (dn.getUsed() != null && dn.getCapacity() != null && dn.getUsed() >= 0 && dn.getCapacity() > 0) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The --nodes-with-failed-volumes filter is silently ignored when --node-id is used.
We should make these options mutually exclusive to avoid confusion when a user provides both in the command but receives the result for the node whose ID was specified, regardless of whether it has failed volumes.