-
Notifications
You must be signed in to change notification settings - Fork 0
<feature>[vm]: add MetadataImpact #3652
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,106 @@ | ||
| package org.zstack.compute.vm; | ||
|
|
||
| import org.springframework.beans.factory.annotation.Autowired; | ||
| import org.zstack.core.cloudbus.CloudBusCallBack; | ||
| import org.zstack.core.componentloader.PluginRegistry; | ||
| import org.zstack.core.db.Q; | ||
| import org.zstack.core.gc.GC; | ||
| import org.zstack.core.gc.GCCompletion; | ||
| import org.zstack.core.gc.TimeBasedGarbageCollector; | ||
| import org.zstack.header.host.HostVO; | ||
| import org.zstack.header.message.MessageReply; | ||
| import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg; | ||
| import org.zstack.header.storage.primary.PrimaryStorageAO_; | ||
| import org.zstack.header.storage.primary.PrimaryStorageConstant; | ||
| import org.zstack.header.storage.primary.PrimaryStorageVO; | ||
| import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint; | ||
| import org.zstack.utils.Utils; | ||
| import org.zstack.utils.logging.CLogger; | ||
|
|
||
| public class CleanupVmInstanceMetadataOnPrimaryStorageGC extends TimeBasedGarbageCollector { | ||
| private static final CLogger logger = Utils.getLogger(CleanupVmInstanceMetadataOnPrimaryStorageGC.class); | ||
|
|
||
| @Autowired | ||
| private PluginRegistry pluginRgty; | ||
|
|
||
| @GC | ||
| public String primaryStorageUuid; | ||
| @GC | ||
| public String vmUuid; | ||
| @GC | ||
| public String rootVolumeUuid; | ||
| @GC | ||
| public String metadataPath; | ||
| @GC | ||
| public String hostUuid; | ||
|
|
||
| public static String getGCName(String vmUuid, String primaryStorageUuid) { | ||
| return String.format("gc-cleanup-vm-metadata-%s-%s", vmUuid, primaryStorageUuid); | ||
| } | ||
|
|
||
| @Override | ||
| protected void triggerNow(GCCompletion completion) { | ||
| if (!dbf.isExist(primaryStorageUuid, PrimaryStorageVO.class)) { | ||
| logger.debug(String.format("[MetadataCleanupGC] primary storage[uuid:%s] no longer exists, " + | ||
| "cancel gc for vm[uuid:%s]", primaryStorageUuid, vmUuid)); | ||
| completion.cancel(); | ||
| return; | ||
| } | ||
|
|
||
| String psType = Q.New(PrimaryStorageVO.class).select(PrimaryStorageAO_.type).eq(PrimaryStorageAO_.uuid, primaryStorageUuid).findValue(); | ||
| if (psType == null) { | ||
| logger.debug(String.format("[MetadataCleanupGC] primary storage[uuid:%s] type not found, " + | ||
| "cancel gc for vm[uuid:%s]", primaryStorageUuid, vmUuid)); | ||
| completion.cancel(); | ||
| return; | ||
| } | ||
|
|
||
| VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class); | ||
| boolean requireHost = ext != null && ext.requireHostForCleanup(); | ||
|
|
||
| if (hostUuid == null && requireHost) { | ||
| logger.debug(String.format("[MetadataCleanupGC] hostUuid is null and ps[uuid:%s, type:%s] " + | ||
| "requires host for cleanup, cancel gc for vm[uuid:%s]", | ||
| primaryStorageUuid, psType, vmUuid)); | ||
| completion.cancel(); | ||
| return; | ||
| } | ||
|
|
||
| if (hostUuid != null && !dbf.isExist(hostUuid, HostVO.class)) { | ||
| if (requireHost) { | ||
| logger.debug(String.format("[MetadataCleanupGC] host[uuid:%s] no longer exists " + | ||
| "and ps[uuid:%s, type:%s] requires host for cleanup, cancel gc for vm[uuid:%s]", | ||
| hostUuid, primaryStorageUuid, psType, vmUuid)); | ||
| completion.cancel(); | ||
| return; | ||
| } | ||
|
|
||
| logger.info(String.format("[MetadataCleanupGC] host[uuid:%s] no longer exists for vm[uuid:%s], " + | ||
| "clear hostUuid and let the primary storage backend pick an available host", hostUuid, vmUuid)); | ||
| hostUuid = null; | ||
| } | ||
|
|
||
| CleanupVmInstanceMetadataOnPrimaryStorageMsg msg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg(); | ||
| msg.setPrimaryStorageUuid(primaryStorageUuid); | ||
| msg.setVmInstanceUuid(vmUuid); | ||
| msg.setRootVolumeUuid(rootVolumeUuid); | ||
| msg.setMetadataPath(metadataPath); | ||
| msg.setHostUuid(hostUuid); | ||
|
|
||
| bus.makeTargetServiceIdByResourceUuid(msg, PrimaryStorageConstant.SERVICE_ID, primaryStorageUuid); | ||
| bus.send(msg, new CloudBusCallBack(completion) { | ||
| @Override | ||
| public void run(MessageReply reply) { | ||
| if (reply.isSuccess()) { | ||
| logger.info(String.format("[MetadataCleanupGC] successfully cleaned up metadata " + | ||
| "for vm[uuid:%s] on ps[uuid:%s]", vmUuid, primaryStorageUuid)); | ||
| completion.success(); | ||
| } else { | ||
| logger.warn(String.format("[MetadataCleanupGC] failed to clean up metadata " + | ||
| "for vm[uuid:%s] on ps[uuid:%s]: %s", vmUuid, primaryStorageUuid, reply.getError())); | ||
| completion.fail(reply.getError()); | ||
| } | ||
| } | ||
| }); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| package org.zstack.compute.vm; | ||
|
|
||
| import org.springframework.beans.factory.annotation.Autowire; | ||
| import org.springframework.beans.factory.annotation.Autowired; | ||
| import org.springframework.beans.factory.annotation.Configurable; | ||
| import org.zstack.core.cloudbus.CloudBus; | ||
| import org.zstack.core.cloudbus.CloudBusCallBack; | ||
| import org.zstack.core.componentloader.PluginRegistry; | ||
| import org.zstack.core.db.Q; | ||
| import org.zstack.header.core.workflow.FlowTrigger; | ||
| import org.zstack.header.core.workflow.NoRollbackFlow; | ||
| import org.zstack.header.message.MessageReply; | ||
| import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg; | ||
| import org.zstack.header.storage.primary.PrimaryStorageConstant; | ||
| import org.zstack.header.storage.primary.PrimaryStorageVO; | ||
| import org.zstack.header.storage.primary.PrimaryStorageVO_; | ||
| import org.zstack.header.vm.VmInstanceConstant; | ||
| import org.zstack.header.vm.VmInstanceSpec; | ||
| import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint; | ||
| import org.zstack.header.volume.VolumeInventory; | ||
| import org.zstack.utils.Utils; | ||
| import org.zstack.utils.logging.CLogger; | ||
|
|
||
| import java.util.Map; | ||
| import java.util.concurrent.TimeUnit; | ||
|
|
||
| @Configurable(preConstruction = true, autowire = Autowire.BY_TYPE) | ||
| public class VmExpungeMetadataFlow extends NoRollbackFlow { | ||
| private static final CLogger logger = Utils.getLogger(VmExpungeMetadataFlow.class); | ||
|
|
||
| @Autowired | ||
| private CloudBus bus; | ||
| @Autowired | ||
| private PluginRegistry pluginRgty; | ||
|
|
||
| @Override | ||
| public void run(FlowTrigger trigger, Map data) { | ||
| final VmInstanceSpec spec = (VmInstanceSpec) data.get(VmInstanceConstant.Params.VmInstanceSpec.toString()); | ||
| if (spec == null || spec.getVmInventory() == null) { | ||
| logger.warn("[MetadataExpunge] missing VmInstanceSpec or VmInventory, skip metadata cleanup"); | ||
| trigger.next(); | ||
| return; | ||
| } | ||
|
|
||
| final String vmUuid = spec.getVmInventory().getUuid(); | ||
|
|
||
| VolumeInventory rootVolume = spec.getVmInventory().getRootVolume(); | ||
| String psUuid = rootVolume != null ? rootVolume.getPrimaryStorageUuid() : null; | ||
| if (psUuid == null) { | ||
| logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] root volume has no primaryStorageUuid, " + | ||
| "skipping metadata cleanup", vmUuid)); | ||
| trigger.next(); | ||
| return; | ||
| } | ||
|
|
||
|
|
||
| String psType = Q.New(PrimaryStorageVO.class).select(PrimaryStorageVO_.type).eq(PrimaryStorageVO_.uuid, psUuid).findValue(); | ||
| if (psType == null) { | ||
| logger.warn(String.format("[MetadataExpunge] primary storage[uuid:%s] not found for vm[uuid:%s], " + | ||
| "skip metadata cleanup", psUuid, vmUuid)); | ||
| trigger.next(); | ||
| return; | ||
| } | ||
|
|
||
| VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class); | ||
| if (ext == null) { | ||
| logger.warn(String.format("[MetadataExpunge] no VmMetadataPathBuildExtensionPoint found for ps[uuid:%s, type:%s], " + | ||
| "skip metadata cleanup", psUuid, psType)); | ||
| trigger.next(); | ||
| return; | ||
| } | ||
| final String metadataPath; | ||
| try { | ||
| metadataPath = ext.buildVmMetadataPath(psUuid, vmUuid); | ||
| } catch (Exception e) { | ||
| logger.warn(String.format("[MetadataExpunge] failed to build metadata path for vm[uuid:%s] on ps[uuid:%s], " + | ||
| "skip metadata cleanup: %s", vmUuid, psUuid, e.getMessage())); | ||
| trigger.next(); | ||
| return; | ||
| } | ||
|
|
||
| String hostUuid = spec.getVmInventory().getHostUuid(); | ||
| if (hostUuid == null) { | ||
| hostUuid = spec.getVmInventory().getLastHostUuid(); | ||
| } | ||
|
|
||
| if (hostUuid == null && ext.requireHostForCleanup()) { | ||
| logger.warn(String.format("[MetadataExpunge] vm[uuid:%s] hostUuid is null, " + | ||
| "ps[uuid:%s, type:%s] requires host for cleanup, skip without submitting GC", | ||
| vmUuid, psUuid, psType)); | ||
| trigger.next(); | ||
| return; | ||
| } | ||
|
Comment on lines
+87
to
+93
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 缺少 host 时不要直接放弃 metadata cleanup。 这是 expunge 链里的 cleanup flow。对 🤖 Prompt for AI Agents |
||
|
|
||
| String rootVolumeUuid = rootVolume.getUuid(); | ||
| CleanupVmInstanceMetadataOnPrimaryStorageMsg cmsg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg(); | ||
| cmsg.setPrimaryStorageUuid(psUuid); | ||
| cmsg.setVmInstanceUuid(vmUuid); | ||
| cmsg.setMetadataPath(metadataPath); | ||
| cmsg.setRootVolumeUuid(rootVolumeUuid); | ||
| cmsg.setHostUuid(hostUuid); | ||
| final String finalPsUuid = psUuid; | ||
| final String finalHostUuid = hostUuid; | ||
|
|
||
| bus.makeTargetServiceIdByResourceUuid(cmsg, PrimaryStorageConstant.SERVICE_ID, psUuid); | ||
| bus.send(cmsg, new CloudBusCallBack(trigger) { | ||
| @Override | ||
| public void run(MessageReply reply) { | ||
| if (reply.isSuccess()) { | ||
| logger.info(String.format("[MetadataExpunge] successfully deleted metadata for vm[uuid:%s] on ps[uuid:%s]", | ||
| vmUuid, finalPsUuid)); | ||
| } else { | ||
| logger.warn(String.format("[MetadataExpunge] failed to delete metadata for vm[uuid:%s] on ps[uuid:%s]: %s, " + | ||
| "submitting GC job for retry", vmUuid, finalPsUuid, reply.getError())); | ||
| submitGC(finalPsUuid, vmUuid, rootVolumeUuid, metadataPath, finalHostUuid); | ||
| } | ||
| trigger.next(); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| private void submitGC(String psUuid, String vmUuid, String rootVolumeUuid, String metadataPath, String hostUuid) { | ||
| CleanupVmInstanceMetadataOnPrimaryStorageGC gc = new CleanupVmInstanceMetadataOnPrimaryStorageGC(); | ||
| gc.NAME = CleanupVmInstanceMetadataOnPrimaryStorageGC.getGCName(vmUuid, psUuid); | ||
| gc.primaryStorageUuid = psUuid; | ||
| gc.vmUuid = vmUuid; | ||
| gc.rootVolumeUuid = rootVolumeUuid; | ||
| gc.metadataPath = metadataPath; | ||
| gc.hostUuid = hostUuid; | ||
| long gcIntervalSec = TimeUnit.HOURS.toSeconds(VmGlobalConfig.VM_METADATA_CLEANUP_GC_INTERVAL.value(Long.class)); | ||
| gc.deduplicateSubmit(gcIntervalSec, TimeUnit.SECONDS); | ||
|
|
||
| logger.info(String.format("[MetadataExpunge] submitted GC job [%s] for vm[uuid:%s] on ps[uuid:%s]", gc.NAME, vmUuid, psUuid)); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
requireHost路径还缺少“host 仍能访问该主存储”的校验。当前唯一返回
requireHostForCleanup()==true的实现是 LocalStorage,而 Line 69 这里只判断HostVO是否存在。LocalStorageBase后续还会在 backend 解析时校验该 host 是否仍绑定这块 PS(Line 3163-3167、Line 3577-3583);如果 host 还在库里但已经从这块 PS 脱离,这个 GC 会持续发送注定失败的 cleanup 并一直重试。这里最好通过扩展点或 PS helper 在发送前把“host 是否仍映射到目标 PS”也判掉,不满足时直接cancel。🤖 Prompt for AI Agents