Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions pkg/drbd/drbdadm.go
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,159 @@ func (a *Adm) AnyConnectedPeerHasDataForVolume(ctx context.Context, resource str
return false
}

// SafeForMkfsRetryPromote probes `drbdsetup status <res> --json` and
// reports whether a promote→mkfs→demote retry is provably safe to run
// RIGHT NOW on this node without the dispatcher's auto-primary
// blessing (the BUG-028 latch-free mkfs retry; see the satellite's
// latchFreeMkfsRetryAllowed for the full story of the false
// RD.Spec.Initialized latch that kills the auto-primary election).
//
// Returns true ONLY when ALL hold:
//
// - the local role is NOT Primary (we are about to promote; an
// already-Primary local slot means some consumer or a previous
// dance holds the device — let it finish);
// - every local volume is diskful UpToDate (the retry exists to add
// a missing filesystem to a HEALTHY converged replica set, never
// to promote an Inconsistent local copy);
// - NO peer is Primary (an external promoter — drbd-reactor's RWX
// mount loop — may briefly hold the device; the caller simply
// retries on a later reconcile once it has demoted again);
// - every connected peer-device is UpToDate or an intentional
// Diskless witness. UpToDate-while-Connected means the peer is in
// the SAME data generation as the local volume (bit-identical), so
// `primary --force` mints nothing unrelated and the subsequent
// mkfs writes replicate to copies that already equal ours. ANY
// other peer-disk state (Inconsistent, DUnknown of a disconnected
// peer, Negotiating, …) vetoes — a disconnected diskful peer could
// be an offline data holder, and forcing primary against one is
// exactly the Bug 342 unrelated-data wedge.
//
// Conservative on any probe / parse failure: returns false, the retry
// just waits for the next reconcile.
func (a *Adm) SafeForMkfsRetryPromote(ctx context.Context, resource string) bool {
out, err := a.exec.Run(ctx, "drbdsetup", "status", resource, "--json")
if err != nil {
return false
}

var status drbdsetupStatusRoot

err = json.Unmarshal(out, &status)
if err != nil || len(status) == 0 {
return false
}

res := status[0]

if Role(res.Role).IsPrimary() {
return false
}

if !localIsUpToDate(res.Devices) {
return false
}

for _, conn := range res.Connections {
if Role(conn.PeerRole).IsPrimary() {
return false
}

for _, pd := range conn.PeerDevices {
switch DiskState(pd.PeerDiskState) {
case DiskStateUpToDate, DiskStateDiskless:
// Lock-step sibling or intentional witness — safe.
case DiskStateConsistent, DiskStateOutdated, DiskStateAttaching,
DiskStateDetaching, DiskStateFailed, DiskStateNegotiating,
DiskStateInconsistent, DiskStateDUnknown:
return false
default:
// Unknown/empty token — refuse, conservative.
return false
}
}
}

return true
}

// Day0SiblingSetConnected probes `drbdsetup status <res> --json` and
// reports whether the ENTIRE configured replica set is currently
// visible to the kernel as a promote-safe day0 candidate set (the
// BUG-028 first-activation mkfs bypass; the GI-level day0 proof is the
// satellite's, this is only the connectivity/coverage half):
//
// - the local role is NOT Primary and every local volume is diskful
// UpToDate (the elected winner seeded UpToDate via set-gi);
// - NO peer is Primary (an external promoter mid-grab defers the
// bypass to the latch-free retry, which handles foreign Primaries);
// - every connected peer-device is UpToDate or Diskless;
// - a peer-device whose state is still unknown (DUnknown — the
// connection has not handshaken) is tolerated ONLY when the peer is
// named in disklessPeers (an intentional diskless witness carries
// no data by construction). An un-handshaken DISKFUL peer refuses:
// it could be an offline data holder, and both `primary --force`
// and mkfs against it are the Bug 342 unrelated-data / data-loss
// wedge.
//
// Why this exists: the dispatcher's CRD-level PeerHasData treats an
// UpToDate sibling whose CurrentGI has not been OBSERVED yet (the
// get-gi backfill is best-effort) as data-bearing. On a fresh day0
// race that conservatism is FALSE and would permanently cost the
// one-shot first-activation mkfs. The kernel coverage here, combined
// with the satellite's local-GI==day0 proof (a Connected+UpToDate peer
// necessarily shares the local data generation), strictly supersedes
// the CRD signal: every case PeerHasData correctly protects is also
// refused here (a real connected data peer forces local GI != day0; a
// disconnected diskful peer is DUnknown).
//
// Conservative on any probe / parse failure: returns false.
func (a *Adm) Day0SiblingSetConnected(ctx context.Context, resource string, disklessPeers map[string]bool) bool {
out, err := a.exec.Run(ctx, "drbdsetup", "status", resource, "--json")
if err != nil {
return false
}

var status drbdsetupStatusRoot

err = json.Unmarshal(out, &status)
if err != nil || len(status) == 0 {
return false
}

res := status[0]

if Role(res.Role).IsPrimary() || !localIsUpToDate(res.Devices) {
return false
}

for _, conn := range res.Connections {
if Role(conn.PeerRole).IsPrimary() {
return false
}

for _, pd := range conn.PeerDevices {
switch DiskState(pd.PeerDiskState) {
case DiskStateUpToDate, DiskStateDiskless:
// Lock-step sibling or intentional witness — safe.
case DiskStateDUnknown:
if !disklessPeers[conn.PeerName] {
return false
}
case DiskStateConsistent, DiskStateOutdated, DiskStateAttaching,
DiskStateDetaching, DiskStateFailed, DiskStateNegotiating,
DiskStateInconsistent:
return false
default:
// Unknown/empty token — refuse, conservative.
return false
}
}
}

return true
}

// NeedsRecoveryPromote probes the live kernel via `drbdsetup status
// <res> --json` and reports whether THIS node should re-arm the
// auto-primary seed to unstick a fresh RD whose initial sync wedged
Expand Down
Loading