Skip to content

Commit 2e57288

Browse files
committed
<feature>[vm]: add MetadataImpact
APIImpact Resolves: ZSV-11559 Change-Id: I6b6a6378627264646d6a76726762736e77787373
1 parent b7f926d commit 2e57288

151 files changed

Lines changed: 4946 additions & 13 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
package org.zstack.compute.vm;
2+
3+
import org.springframework.beans.factory.annotation.Autowired;
4+
import org.zstack.core.cloudbus.CloudBusCallBack;
5+
import org.zstack.core.componentloader.PluginRegistry;
6+
import org.zstack.core.db.Q;
7+
import org.zstack.core.gc.GC;
8+
import org.zstack.core.gc.GCCompletion;
9+
import org.zstack.core.gc.TimeBasedGarbageCollector;
10+
import org.zstack.header.host.HostVO;
11+
import org.zstack.header.message.MessageReply;
12+
import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg;
13+
import org.zstack.header.storage.primary.PrimaryStorageAO_;
14+
import org.zstack.header.storage.primary.PrimaryStorageConstant;
15+
import org.zstack.header.storage.primary.PrimaryStorageVO;
16+
import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint;
17+
import org.zstack.utils.Utils;
18+
import org.zstack.utils.logging.CLogger;
19+
20+
public class CleanupVmInstanceMetadataOnPrimaryStorageGC extends TimeBasedGarbageCollector {
21+
private static final CLogger logger = Utils.getLogger(CleanupVmInstanceMetadataOnPrimaryStorageGC.class);
22+
23+
@Autowired
24+
private PluginRegistry pluginRgty;
25+
26+
@GC
27+
public String primaryStorageUuid;
28+
@GC
29+
public String vmUuid;
30+
@GC
31+
public String rootVolumeUuid;
32+
@GC
33+
public String metadataPath;
34+
@GC
35+
public String hostUuid;
36+
37+
public static String getGCName(String vmUuid, String primaryStorageUuid) {
38+
return String.format("gc-cleanup-vm-metadata-%s-%s", vmUuid, primaryStorageUuid);
39+
}
40+
41+
@Override
42+
protected void triggerNow(GCCompletion completion) {
43+
if (!dbf.isExist(primaryStorageUuid, PrimaryStorageVO.class)) {
44+
logger.debug(String.format("[MetadataCleanupGC] primary storage[uuid:%s] no longer exists, " +
45+
"cancel gc for vm[uuid:%s]", primaryStorageUuid, vmUuid));
46+
completion.cancel();
47+
return;
48+
}
49+
50+
String psType = Q.New(PrimaryStorageVO.class)
51+
.select(PrimaryStorageAO_.type)
52+
.eq(PrimaryStorageAO_.uuid, primaryStorageUuid)
53+
.findValue();
54+
55+
VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class);
56+
boolean requireHost = ext != null && ext.requireHostForCleanup();
57+
58+
if (hostUuid == null && requireHost) {
59+
logger.debug(String.format("[MetadataCleanupGC] hostUuid is null and ps[uuid:%s, type:%s] " +
60+
"requires host for cleanup, cancel gc for vm[uuid:%s]",
61+
primaryStorageUuid, psType, vmUuid));
62+
completion.cancel();
63+
return;
64+
}
65+
66+
if (hostUuid != null && !dbf.isExist(hostUuid, HostVO.class)) {
67+
if (requireHost) {
68+
logger.debug(String.format("[MetadataCleanupGC] host[uuid:%s] no longer exists " +
69+
"and ps[uuid:%s, type:%s] requires host for cleanup, cancel gc for vm[uuid:%s]",
70+
hostUuid, primaryStorageUuid, psType, vmUuid));
71+
completion.cancel();
72+
return;
73+
}
74+
75+
logger.info(String.format("[MetadataCleanupGC] host[uuid:%s] no longer exists for vm[uuid:%s], " +
76+
"clear hostUuid and let the primary storage backend pick an available host", hostUuid, vmUuid));
77+
hostUuid = null;
78+
}
79+
80+
CleanupVmInstanceMetadataOnPrimaryStorageMsg msg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
81+
msg.setPrimaryStorageUuid(primaryStorageUuid);
82+
msg.setVmInstanceUuid(vmUuid);
83+
msg.setRootVolumeUuid(rootVolumeUuid);
84+
msg.setMetadataPath(metadataPath);
85+
msg.setHostUuid(hostUuid);
86+
87+
bus.makeTargetServiceIdByResourceUuid(msg, PrimaryStorageConstant.SERVICE_ID, primaryStorageUuid);
88+
bus.send(msg, new CloudBusCallBack(completion) {
89+
@Override
90+
public void run(MessageReply reply) {
91+
if (reply.isSuccess()) {
92+
logger.info(String.format("[MetadataCleanupGC] successfully cleaned up metadata " +
93+
"for vm[uuid:%s] on ps[uuid:%s]", vmUuid, primaryStorageUuid));
94+
completion.success();
95+
} else {
96+
logger.warn(String.format("[MetadataCleanupGC] failed to clean up metadata " +
97+
"for vm[uuid:%s] on ps[uuid:%s]: %s", vmUuid, primaryStorageUuid, reply.getError()));
98+
completion.fail(reply.getError());
99+
}
100+
}
101+
});
102+
}
103+
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
package org.zstack.compute.vm;
2+
3+
import org.springframework.beans.factory.annotation.Autowire;
4+
import org.springframework.beans.factory.annotation.Autowired;
5+
import org.springframework.beans.factory.annotation.Configurable;
6+
import org.zstack.core.cloudbus.CloudBus;
7+
import org.zstack.core.cloudbus.CloudBusCallBack;
8+
import org.zstack.core.componentloader.PluginRegistry;
9+
import org.zstack.core.db.Q;
10+
import org.zstack.header.core.workflow.FlowTrigger;
11+
import org.zstack.header.core.workflow.NoRollbackFlow;
12+
import org.zstack.header.message.MessageReply;
13+
import org.zstack.header.storage.primary.CleanupVmInstanceMetadataOnPrimaryStorageMsg;
14+
import org.zstack.header.storage.primary.PrimaryStorageConstant;
15+
import org.zstack.header.storage.primary.PrimaryStorageVO;
16+
import org.zstack.header.storage.primary.PrimaryStorageVO_;
17+
import org.zstack.header.vm.VmInstanceConstant;
18+
import org.zstack.header.vm.VmInstanceSpec;
19+
import org.zstack.header.vm.metadata.VmMetadataPathBuildExtensionPoint;
20+
import org.zstack.header.volume.VolumeInventory;
21+
import org.zstack.utils.Utils;
22+
import org.zstack.utils.logging.CLogger;
23+
24+
import java.util.Map;
25+
import java.util.concurrent.TimeUnit;
26+
27+
@Configurable(preConstruction = true, autowire = Autowire.BY_TYPE)
28+
public class VmExpungeMetadataFlow extends NoRollbackFlow {
29+
private static final CLogger logger = Utils.getLogger(VmExpungeMetadataFlow.class);
30+
31+
@Autowired
32+
private CloudBus bus;
33+
@Autowired
34+
private PluginRegistry pluginRgty;
35+
36+
@Override
37+
public void run(FlowTrigger trigger, Map data) {
38+
final VmInstanceSpec spec = (VmInstanceSpec) data.get(VmInstanceConstant.Params.VmInstanceSpec.toString());
39+
if (spec == null || spec.getVmInventory() == null) {
40+
logger.warn("[MetadataExpunge] missing VmInstanceSpec or VmInventory, skip metadata cleanup");
41+
trigger.next();
42+
return;
43+
}
44+
45+
final String vmUuid = spec.getVmInventory().getUuid();
46+
47+
VolumeInventory rootVolume = spec.getVmInventory().getRootVolume();
48+
String psUuid = rootVolume != null ? rootVolume.getPrimaryStorageUuid() : null;
49+
if (psUuid == null) {
50+
logger.debug(String.format("[MetadataExpunge] vm[uuid:%s] root volume has no primaryStorageUuid, " +
51+
"skipping metadata cleanup", vmUuid));
52+
trigger.next();
53+
return;
54+
}
55+
56+
57+
String psType = Q.New(PrimaryStorageVO.class).select(PrimaryStorageVO_.type).eq(PrimaryStorageVO_.uuid, psUuid).findValue();
58+
if (psType == null) {
59+
logger.warn(String.format("[MetadataExpunge] primary storage[uuid:%s] not found for vm[uuid:%s], " +
60+
"skip metadata cleanup", psUuid, vmUuid));
61+
trigger.next();
62+
return;
63+
}
64+
65+
VmMetadataPathBuildExtensionPoint ext = pluginRgty.getExtensionFromMap(psType, VmMetadataPathBuildExtensionPoint.class);
66+
if (ext == null) {
67+
logger.warn(String.format("[MetadataExpunge] no VmMetadataPathBuildExtensionPoint found for ps[uuid:%s, type:%s], " +
68+
"skip metadata cleanup", psUuid, psType));
69+
trigger.next();
70+
return;
71+
}
72+
final String metadataPath;
73+
try {
74+
metadataPath = ext.buildVmMetadataPath(psUuid, vmUuid);
75+
} catch (Exception e) {
76+
logger.warn(String.format("[MetadataExpunge] failed to build metadata path for vm[uuid:%s] on ps[uuid:%s], " +
77+
"skip metadata cleanup: %s", vmUuid, psUuid, e.getMessage()));
78+
trigger.next();
79+
return;
80+
}
81+
82+
String hostUuid = spec.getVmInventory().getHostUuid();
83+
if (hostUuid == null) {
84+
hostUuid = spec.getVmInventory().getLastHostUuid();
85+
}
86+
87+
if (hostUuid == null && ext.requireHostForCleanup()) {
88+
logger.warn(String.format("[MetadataExpunge] vm[uuid:%s] hostUuid is null, " +
89+
"ps[uuid:%s, type:%s] requires host for cleanup, skip without submitting GC",
90+
vmUuid, psUuid, psType));
91+
trigger.next();
92+
return;
93+
}
94+
95+
String rootVolumeUuid = rootVolume.getUuid();
96+
CleanupVmInstanceMetadataOnPrimaryStorageMsg cmsg = new CleanupVmInstanceMetadataOnPrimaryStorageMsg();
97+
cmsg.setPrimaryStorageUuid(psUuid);
98+
cmsg.setVmInstanceUuid(vmUuid);
99+
cmsg.setMetadataPath(metadataPath);
100+
cmsg.setRootVolumeUuid(rootVolumeUuid);
101+
cmsg.setHostUuid(hostUuid);
102+
final String finalPsUuid = psUuid;
103+
final String finalHostUuid = hostUuid;
104+
105+
bus.makeTargetServiceIdByResourceUuid(cmsg, PrimaryStorageConstant.SERVICE_ID, psUuid);
106+
bus.send(cmsg, new CloudBusCallBack(trigger) {
107+
@Override
108+
public void run(MessageReply reply) {
109+
if (reply.isSuccess()) {
110+
logger.info(String.format("[MetadataExpunge] successfully deleted metadata for vm[uuid:%s] on ps[uuid:%s]",
111+
vmUuid, finalPsUuid));
112+
} else {
113+
logger.warn(String.format("[MetadataExpunge] failed to delete metadata for vm[uuid:%s] on ps[uuid:%s]: %s, " +
114+
"submitting GC job for retry", vmUuid, finalPsUuid, reply.getError()));
115+
submitGC(finalPsUuid, vmUuid, rootVolumeUuid, metadataPath, finalHostUuid);
116+
}
117+
trigger.next();
118+
}
119+
});
120+
}
121+
122+
private void submitGC(String psUuid, String vmUuid, String rootVolumeUuid, String metadataPath, String hostUuid) {
123+
CleanupVmInstanceMetadataOnPrimaryStorageGC gc = new CleanupVmInstanceMetadataOnPrimaryStorageGC();
124+
gc.NAME = CleanupVmInstanceMetadataOnPrimaryStorageGC.getGCName(vmUuid, psUuid);
125+
gc.primaryStorageUuid = psUuid;
126+
gc.vmUuid = vmUuid;
127+
gc.rootVolumeUuid = rootVolumeUuid;
128+
gc.metadataPath = metadataPath;
129+
gc.hostUuid = hostUuid;
130+
long gcIntervalSec = TimeUnit.HOURS.toSeconds(VmGlobalConfig.VM_METADATA_CLEANUP_GC_INTERVAL.value(Long.class));
131+
gc.deduplicateSubmit(gcIntervalSec, TimeUnit.SECONDS);
132+
133+
logger.info(String.format("[MetadataExpunge] submitted GC job [%s] for vm[uuid:%s] on ps[uuid:%s]", gc.NAME, vmUuid, psUuid));
134+
}
135+
}

compute/src/main/java/org/zstack/compute/vm/VmGlobalConfig.java

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,55 @@ public class VmGlobalConfig {
139139
@GlobalConfigValidation(validValues = {"true", "false"})
140140
@BindResourceConfig(value = {VmInstanceVO.class, ClusterVO.class})
141141
public static GlobalConfig RESET_TPM_AFTER_VM_CLONE = new GlobalConfig(CATEGORY, "reset.tpm.after.vm.clone");
142+
143+
@GlobalConfigValidation(validValues = {"true", "false"})
144+
public static GlobalConfig VM_METADATA_ENABLED = new GlobalConfig(CATEGORY, "vm.metadata.enabled");
145+
146+
@GlobalConfigValidation()
147+
public static GlobalConfig VM_METADATA_LAST_REFRESH_VERSION = new GlobalConfig(CATEGORY, "vm.metadata.lastRefreshVersion");
148+
149+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 100)
150+
public static GlobalConfig VM_METADATA_FLUSH_CONCURRENCY = new GlobalConfig(CATEGORY, "vm.metadata.flush.concurrency");
151+
152+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 300)
153+
public static GlobalConfig VM_METADATA_FLUSH_POLL_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.flush.pollInterval");
154+
155+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 1000)
156+
public static GlobalConfig VM_METADATA_FLUSH_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.flush.batchSize");
157+
158+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 168)
159+
public static GlobalConfig VM_METADATA_CLEANUP_GC_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.cleanup.gc.interval");
160+
161+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 100)
162+
public static GlobalConfig VM_METADATA_FLUSH_MAX_RETRY = new GlobalConfig(CATEGORY, "vm.metadata.flush.maxRetry");
163+
164+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 120)
165+
public static GlobalConfig VM_METADATA_FLUSH_ZOMBIE_CLAIM_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.flush.zombieClaimThreshold");
166+
167+
// 6h = 21600s, 48h = 172800s
168+
@GlobalConfigValidation(numberGreaterThan = 21599, numberLessThan = 172801)
169+
public static GlobalConfig VM_METADATA_MAINTENANCE_CONTENT_DRIFT_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.contentDriftInterval");
170+
171+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 86400)
172+
public static GlobalConfig VM_METADATA_MAINTENANCE_STALE_RECOVERY_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.staleRecoveryInterval");
173+
174+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 1000)
175+
public static GlobalConfig VM_METADATA_MAINTENANCE_STALE_RECOVERY_MAX_CYCLES = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.staleRecoveryMaxCycles");
176+
177+
@GlobalConfigValidation(numberGreaterThan = 0)
178+
public static GlobalConfig VM_METADATA_PAYLOAD_REJECT_THRESHOLD = new GlobalConfig(CATEGORY, "vm.metadata.payload.rejectThreshold");
179+
180+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 86400)
181+
public static GlobalConfig VM_METADATA_MAINTENANCE_ORPHAN_CHECK_INTERVAL = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.orphanCheckInterval");
182+
183+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 1000)
184+
public static GlobalConfig VM_METADATA_MAINTENANCE_STALE_RECOVERY_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.staleRecoveryBatchSize");
185+
186+
// Range: 10-30
187+
@GlobalConfigValidation(numberGreaterThan = 9, numberLessThan = 31)
188+
public static GlobalConfig VM_METADATA_MAINTENANCE_CONTENT_DRIFT_BATCH_SIZE = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.contentDriftBatchSize");
189+
190+
// Range: 1-10 seconds
191+
@GlobalConfigValidation(numberGreaterThan = 0, numberLessThan = 11)
192+
public static GlobalConfig VM_METADATA_MAINTENANCE_CONTENT_DRIFT_BATCH_SLEEP_SEC = new GlobalConfig(CATEGORY, "vm.metadata.maintenance.contentDriftBatchSleepSec");
142193
}

0 commit comments

Comments
 (0)