Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 155 additions & 88 deletions lib/api/apiUtils/object/getReplicationInfo.js
Original file line number Diff line number Diff line change
@@ -1,73 +1,101 @@
const { isServiceAccount, getServiceAccountProperties } =
require('../authorization/permissionChecks');
const { isServiceAccount, getServiceAccountProperties } = require('../authorization/permissionChecks');
const { replicationBackends } = require('arsenal').constants;

function _getBackend(objectMD, site) {
const backends = objectMD ? objectMD.replicationInfo.backends : [];
const backend = backends.find(o => o.site === site);
// If the backend already exists, just update the status.
if (backend) {
return Object.assign({}, backend, { status: 'PENDING' });
}
/**
* Build a backend entry for replication. If the object already has a
* backend for the given site, clone it and reset its status to PENDING.
* Otherwise create a new entry. For CRR backends, the resolved
* destination bucket ARN and role are stamped on the entry; cloud
* backends carry only `site`, `status`, `dataStoreVersionId`. The
* location type is not stored - it is resolved from the location
* configuration via `site` whenever needed.
* @param {object} [objectMD] - existing object metadata
* @param {string} site - destination site name
* @param {object} [opts] - optional per-backend fields (CRR only)
* @param {string} [opts.destination] - destination bucket ARN
* @param {string} [opts.role] - destination role ARN
* @return {object} backend entry
*/
function _getBackend(objectMD, site, { destination, role } = {}) {
const existing = objectMD?.replicationInfo?.backends?.find(b => b.site === site);

const base = existing
? { ...existing, status: 'PENDING' }
: { site, status: 'PENDING', dataStoreVersionId: '' };

return {
site,
status: 'PENDING',
dataStoreVersionId: '',
...base,
...(destination && { destination }),
...(role && { role }),
};
}

function _getStorageClasses(s3config, rule) {
/**
* Resolve the storage class for a replication rule. Uses the rule's
* explicit storageClass if set, otherwise falls back to the default
* replication endpoint.
* @param {object} s3config - server configuration
* @param {object} rule - replication rule
* @return {string|undefined} storage class name, or undefined if none
*/
function _getStorageClass(s3config, rule) {
if (rule.storageClass) {
return rule.storageClass.split(',');
return rule.storageClass;
}
const { replicationEndpoints } = s3config;
// If no storage class, use the given default endpoint or the sole endpoint
if (replicationEndpoints.length > 0) {
const endPoint =
replicationEndpoints.find(endpoint => endpoint.default) || replicationEndpoints[0];
return [endPoint.site];
const { replicationEndpoints = [] } = s3config;
if (replicationEndpoints.length === 0) {
return undefined;
}
return undefined;
const endPoint = replicationEndpoints.find(e => e.default) ?? replicationEndpoints[0];
return endPoint?.site;
}

function _getReplicationInfo(s3config, rule, replicationConfig, content, operationType,
objectMD, bucketMD) {
const storageTypes = [];
const backends = [];
const storageClasses = _getStorageClasses(s3config, rule);
if (!storageClasses) {
return undefined;
}
storageClasses.forEach(storageClass => {
const storageClassName =
storageClass.endsWith(':preferred_read') ?
storageClass.split(':')[0] : storageClass;
// TODO CLDSRV-646: for consistency, should we look at replicationEndpoints instead, like
// `_getStorageClasses()` ?
const location = s3config.locationConstraints[storageClassName];
if (location && replicationBackends[location.type]) {
storageTypes.push(location.type);
}
backends.push(_getBackend(objectMD, storageClassName));
});
if (storageTypes.length > 0 && operationType) {
content.push(operationType);
/**
* Split the top-level Role field into source and destination role
* ARNs. The field is either a single ARN (used as a template for both
* sides) or a comma-separated pair "source,destination".
*/
function _splitRole(role) {
if (!role) {
return { source: undefined, destination: undefined };
}
const parts = role.split(',');
return {
status: 'PENDING',
backends,
content,
destination: replicationConfig.destination,
storageClass: storageClasses.join(','),
role: replicationConfig.role,
storageType: storageTypes.join(','),
isNFS: bucketMD.isNFS(),
source: parts[0],
destination: parts[1] ?? parts[0],
};
}

/**
* Check whether the authenticated user is allowed to trigger replication.
* Internal service accounts (e.g. Lifecycle) are not allowed unless their
* account properties explicitly permit it (e.g. MD ingestion).
* @param {AuthInfo} [authInfo] - authentication info of the request issuer
* @return {boolean} true if the user can trigger replication
*/
function _canUserReplicate(authInfo) {
if (!authInfo) {
return true;
}
const canonicalId = authInfo.getCanonicalID();
if (!isServiceAccount(canonicalId)) {
return true;
}
const props = getServiceAccountProperties(canonicalId);
return !!props?.canReplicate;
}

/**
* Get the object replicationInfo to replicate data and metadata, or only
* metadata if the operation only changes metadata or the object is 0 bytes
* metadata if the operation only changes metadata or the object is 0 bytes.
*
* Matches all enabled rules whose prefix matches the object key. When
* multiple matching rules target the same destination site, only the
* highest-priority rule applies (rules with no priority are treated as
* lowest). Each backend stamps per-backend `destination` and `role`
* fields for CRR locations and a `storageType` field for cloud
* locations.
*
* @param {object} s3config - Cloudserver configuration object
* @param {object} s3config.locationConstraints - Configured map of location constraints
* @param {object[]} s3config.replicationEndpoints - Configured replication endpoints
Expand All @@ -78,49 +106,88 @@ function _getReplicationInfo(s3config, rule, replicationConfig, content, operati
* @param {string} operationType - The type of operation to replicate
* @param {object} objectMD - The object metadata
* @param {AuthInfo} [authInfo] - authentication info of object owner
* @return {undefined}
* @return {object|undefined}
*/
function getReplicationInfo(
s3config, objKey, bucketMD, isMD, objSize, operationType, objectMD, authInfo) {
const content = isMD || objSize === 0 ? ['METADATA'] : ['DATA', 'METADATA'];
function getReplicationInfo(s3config, objKey, bucketMD, isMD, objSize, operationType, objectMD, authInfo) {
const config = bucketMD.getReplicationConfiguration();
if (!config || !_canUserReplicate(authInfo)) {
return undefined;
}

// Do not replicate object in the following cases:
//
// - bucket does not have a replication configuration
//
// - replication configuration does not apply to the object
// (i.e. no rule matches object prefix)
//
// - replication configuration applies to the object (i.e. a rule matches
// object prefix) but the status is disabled
//
// - object owner is an internal service account like Lifecycle,
// unless the account properties explicitly allow it to
// replicate like MD ingestion (because we do not want to
// replicate objects created from actions triggered by internal
// services, by design)

if (config) {
let doReplicate = false;
if (!authInfo || !isServiceAccount(authInfo.getCanonicalID())) {
doReplicate = true;
} else {
const serviceAccountProps = getServiceAccountProperties(
authInfo.getCanonicalID());
doReplicate = serviceAccountProps.canReplicate;
const activeRules = config.rules.filter(r => r.enabled && objKey.startsWith(r.prefix));
if (activeRules.length === 0) {
return undefined;
}

// Expand each rule's storageClass (which may be a comma-separated
// list in the legacy multi-destination form) into one (site, rule)
// pair per destination site.
const items = [];
for (const rule of activeRules) {
const storageClassString = _getStorageClass(s3config, rule);
if (!storageClassString) {
continue;
}
for (const raw of storageClassString.split(',')) {
// Strip the optional `:preferred_read` suffix.
const site = raw.split(':')[0];
items.push({ site, rule });
}
}
if (items.length === 0) {
return undefined;
}

// Dedup by site: when multiple rules target the same site, the
// highest-priority rule wins. Rules without a priority are treated
// as the lowest priority; ties keep the first occurrence.
const bySite = new Map();
for (const item of items) {
const cur = bySite.get(item.site);
const newP = item.rule.priority ?? -Infinity;
const curP = cur?.rule.priority ?? -Infinity;
if (!cur || newP > curP) {
bySite.set(item.site, item);
}
if (doReplicate) {
const rule = config.rules.find(
rule => (objKey.startsWith(rule.prefix) && rule.enabled));
if (rule) {
// TODO CLDSRV-646 : should "merge" the replicationInfo for different rules
return _getReplicationInfo(
s3config, rule, config, content, operationType, objectMD, bucketMD);
}

const { destination: destRoleTemplate } = _splitRole(config.role);
let hasCloudBackend = false;

const backends = [];
for (const [site, { rule }] of bySite) {
const isCloud = !!replicationBackends[s3config.locationConstraints[site]?.type];

const opts = {};
if (isCloud) {
hasCloudBackend = true;
} else {
// CRR backend: stamp resolved destination bucket and role
// so backbeat can perform the replication deterministically.
const destination = rule.destination ?? config.destination;
const role = rule.role ?? destRoleTemplate;
if (destination) {
opts.destination = destination;
}
if (role) {
opts.role = role;
}
}
backends.push(_getBackend(objectMD, site, opts));
}
return undefined;

const content = (isMD || objSize === 0) ? ['METADATA'] : ['DATA', 'METADATA'];
if (hasCloudBackend && operationType) {
content.push(operationType);
}

return {
status: 'PENDING',
backends,
content,
role: _splitRole(config.role).source,
isNFS: bucketMD.isNFS(),
};
}

module.exports = getReplicationInfo;
28 changes: 21 additions & 7 deletions lib/metadata/acl.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,31 @@ const acl = {
objectMD.acl = addACLParams;
objectMD.originOp = 's3:ObjectAcl:Put';

// Use storageType to determine if replication update is needed, as it is set only for
// "cloud" locations. This ensures that we reset replication when CRR is used, but not
// when multi-backend replication (i.e. Zenko) is used.
// TODO: this should be refactored to properly update the replication info, accounting
// for multiple rules and resetting the status only if needed CLDSRV-646
// Rebuild replication info from the current bucket config to
// pick up any new destinations. CRR backends carry a
// resolved destination role on the entry (backbeat needs it
// to authenticate on the destination side); cloud backends
// don't (credentials live in the location configuration).
// For cloud backends, ACL replication is not supported, so
// preserve their existing status instead of resetting to
// PENDING.
const isCRR = b => !!b.role;

const replicationInfo = getReplicationInfo(config, objectKey, bucket, true);
if (replicationInfo && !replicationInfo.storageType) {
if (replicationInfo && replicationInfo.backends.some(isCRR)) {
const backends = replicationInfo.backends.map(b => {
if (isCRR(b)) {
return b;
}

const existing = objectMD.replicationInfo.backends.find(e => e.site === b.site);
return existing || b;
});

objectMD.replicationInfo = {
...objectMD.replicationInfo,
...replicationInfo,
backends,
};
}

Expand Down Expand Up @@ -171,4 +186,3 @@ const acl = {
};

module.exports = acl;

Loading
Loading