diff --git a/lib/api/apiUtils/object/getReplicationInfo.js b/lib/api/apiUtils/object/getReplicationInfo.js index b7e4afb25d..d54bc39ef4 100644 --- a/lib/api/apiUtils/object/getReplicationInfo.js +++ b/lib/api/apiUtils/object/getReplicationInfo.js @@ -1,73 +1,101 @@ -const { isServiceAccount, getServiceAccountProperties } = - require('../authorization/permissionChecks'); +const { isServiceAccount, getServiceAccountProperties } = require('../authorization/permissionChecks'); const { replicationBackends } = require('arsenal').constants; -function _getBackend(objectMD, site) { - const backends = objectMD ? objectMD.replicationInfo.backends : []; - const backend = backends.find(o => o.site === site); - // If the backend already exists, just update the status. - if (backend) { - return Object.assign({}, backend, { status: 'PENDING' }); - } +/** + * Build a backend entry for replication. If the object already has a + * backend for the given site, clone it and reset its status to PENDING. + * Otherwise create a new entry. For CRR backends, the resolved + * destination bucket ARN and role are stamped on the entry; cloud + * backends carry only `site`, `status`, `dataStoreVersionId`. The + * location type is not stored - it is resolved from the location + * configuration via `site` whenever needed. + * @param {object} [objectMD] - existing object metadata + * @param {string} site - destination site name + * @param {object} [opts] - optional per-backend fields (CRR only) + * @param {string} [opts.destination] - destination bucket ARN + * @param {string} [opts.role] - destination role ARN + * @return {object} backend entry + */ +function _getBackend(objectMD, site, { destination, role } = {}) { + const existing = objectMD?.replicationInfo?.backends?.find(b => b.site === site); + + const base = existing + ? { ...existing, status: 'PENDING' } + : { site, status: 'PENDING', dataStoreVersionId: '' }; + return { - site, - status: 'PENDING', - dataStoreVersionId: '', + ...base, + ...(destination && { destination }), + ...(role && { role }), }; } -function _getStorageClasses(s3config, rule) { +/** + * Resolve the storage class for a replication rule. Uses the rule's + * explicit storageClass if set, otherwise falls back to the default + * replication endpoint. + * @param {object} s3config - server configuration + * @param {object} rule - replication rule + * @return {string|undefined} storage class name, or undefined if none + */ +function _getStorageClass(s3config, rule) { if (rule.storageClass) { - return rule.storageClass.split(','); + return rule.storageClass; } - const { replicationEndpoints } = s3config; - // If no storage class, use the given default endpoint or the sole endpoint - if (replicationEndpoints.length > 0) { - const endPoint = - replicationEndpoints.find(endpoint => endpoint.default) || replicationEndpoints[0]; - return [endPoint.site]; + const { replicationEndpoints = [] } = s3config; + if (replicationEndpoints.length === 0) { + return undefined; } - return undefined; + const endPoint = replicationEndpoints.find(e => e.default) ?? replicationEndpoints[0]; + return endPoint?.site; } -function _getReplicationInfo(s3config, rule, replicationConfig, content, operationType, - objectMD, bucketMD) { - const storageTypes = []; - const backends = []; - const storageClasses = _getStorageClasses(s3config, rule); - if (!storageClasses) { - return undefined; - } - storageClasses.forEach(storageClass => { - const storageClassName = - storageClass.endsWith(':preferred_read') ? - storageClass.split(':')[0] : storageClass; - // TODO CLDSRV-646: for consistency, should we look at replicationEndpoints instead, like - // `_getStorageClasses()` ? - const location = s3config.locationConstraints[storageClassName]; - if (location && replicationBackends[location.type]) { - storageTypes.push(location.type); - } - backends.push(_getBackend(objectMD, storageClassName)); - }); - if (storageTypes.length > 0 && operationType) { - content.push(operationType); +/** + * Split the top-level Role field into source and destination role + * ARNs. The field is either a single ARN (used as a template for both + * sides) or a comma-separated pair "source,destination". + */ +function _splitRole(role) { + if (!role) { + return { source: undefined, destination: undefined }; } + const parts = role.split(','); return { - status: 'PENDING', - backends, - content, - destination: replicationConfig.destination, - storageClass: storageClasses.join(','), - role: replicationConfig.role, - storageType: storageTypes.join(','), - isNFS: bucketMD.isNFS(), + source: parts[0], + destination: parts[1] ?? parts[0], }; } +/** + * Check whether the authenticated user is allowed to trigger replication. + * Internal service accounts (e.g. Lifecycle) are not allowed unless their + * account properties explicitly permit it (e.g. MD ingestion). + * @param {AuthInfo} [authInfo] - authentication info of the request issuer + * @return {boolean} true if the user can trigger replication + */ +function _canUserReplicate(authInfo) { + if (!authInfo) { + return true; + } + const canonicalId = authInfo.getCanonicalID(); + if (!isServiceAccount(canonicalId)) { + return true; + } + const props = getServiceAccountProperties(canonicalId); + return !!props?.canReplicate; +} + /** * Get the object replicationInfo to replicate data and metadata, or only - * metadata if the operation only changes metadata or the object is 0 bytes + * metadata if the operation only changes metadata or the object is 0 bytes. + * + * Matches all enabled rules whose prefix matches the object key. When + * multiple matching rules target the same destination site, only the + * highest-priority rule applies (rules with no priority are treated as + * lowest). Each backend stamps per-backend `destination` and `role` + * fields for CRR locations and a `storageType` field for cloud + * locations. + * * @param {object} s3config - Cloudserver configuration object * @param {object} s3config.locationConstraints - Configured map of location constraints * @param {object[]} s3config.replicationEndpoints - Configured replication endpoints @@ -78,49 +106,88 @@ function _getReplicationInfo(s3config, rule, replicationConfig, content, operati * @param {string} operationType - The type of operation to replicate * @param {object} objectMD - The object metadata * @param {AuthInfo} [authInfo] - authentication info of object owner - * @return {undefined} + * @return {object|undefined} */ -function getReplicationInfo( - s3config, objKey, bucketMD, isMD, objSize, operationType, objectMD, authInfo) { - const content = isMD || objSize === 0 ? ['METADATA'] : ['DATA', 'METADATA']; +function getReplicationInfo(s3config, objKey, bucketMD, isMD, objSize, operationType, objectMD, authInfo) { const config = bucketMD.getReplicationConfiguration(); + if (!config || !_canUserReplicate(authInfo)) { + return undefined; + } - // Do not replicate object in the following cases: - // - // - bucket does not have a replication configuration - // - // - replication configuration does not apply to the object - // (i.e. no rule matches object prefix) - // - // - replication configuration applies to the object (i.e. a rule matches - // object prefix) but the status is disabled - // - // - object owner is an internal service account like Lifecycle, - // unless the account properties explicitly allow it to - // replicate like MD ingestion (because we do not want to - // replicate objects created from actions triggered by internal - // services, by design) - - if (config) { - let doReplicate = false; - if (!authInfo || !isServiceAccount(authInfo.getCanonicalID())) { - doReplicate = true; - } else { - const serviceAccountProps = getServiceAccountProperties( - authInfo.getCanonicalID()); - doReplicate = serviceAccountProps.canReplicate; + const activeRules = config.rules.filter(r => r.enabled && objKey.startsWith(r.prefix)); + if (activeRules.length === 0) { + return undefined; + } + + // Expand each rule's storageClass (which may be a comma-separated + // list in the legacy multi-destination form) into one (site, rule) + // pair per destination site. + const items = []; + for (const rule of activeRules) { + const storageClassString = _getStorageClass(s3config, rule); + if (!storageClassString) { + continue; + } + for (const raw of storageClassString.split(',')) { + // Strip the optional `:preferred_read` suffix. + const site = raw.split(':')[0]; + items.push({ site, rule }); + } + } + if (items.length === 0) { + return undefined; + } + + // Dedup by site: when multiple rules target the same site, the + // highest-priority rule wins. Rules without a priority are treated + // as the lowest priority; ties keep the first occurrence. + const bySite = new Map(); + for (const item of items) { + const cur = bySite.get(item.site); + const newP = item.rule.priority ?? -Infinity; + const curP = cur?.rule.priority ?? -Infinity; + if (!cur || newP > curP) { + bySite.set(item.site, item); } - if (doReplicate) { - const rule = config.rules.find( - rule => (objKey.startsWith(rule.prefix) && rule.enabled)); - if (rule) { - // TODO CLDSRV-646 : should "merge" the replicationInfo for different rules - return _getReplicationInfo( - s3config, rule, config, content, operationType, objectMD, bucketMD); + } + + const { destination: destRoleTemplate } = _splitRole(config.role); + let hasCloudBackend = false; + + const backends = []; + for (const [site, { rule }] of bySite) { + const isCloud = !!replicationBackends[s3config.locationConstraints[site]?.type]; + + const opts = {}; + if (isCloud) { + hasCloudBackend = true; + } else { + // CRR backend: stamp resolved destination bucket and role + // so backbeat can perform the replication deterministically. + const destination = rule.destination ?? config.destination; + const role = rule.role ?? destRoleTemplate; + if (destination) { + opts.destination = destination; + } + if (role) { + opts.role = role; } } + backends.push(_getBackend(objectMD, site, opts)); } - return undefined; + + const content = (isMD || objSize === 0) ? ['METADATA'] : ['DATA', 'METADATA']; + if (hasCloudBackend && operationType) { + content.push(operationType); + } + + return { + status: 'PENDING', + backends, + content, + role: _splitRole(config.role).source, + isNFS: bucketMD.isNFS(), + }; } module.exports = getReplicationInfo; diff --git a/lib/metadata/acl.js b/lib/metadata/acl.js index f48ab7aa42..7dd49b7fa8 100644 --- a/lib/metadata/acl.js +++ b/lib/metadata/acl.js @@ -44,16 +44,31 @@ const acl = { objectMD.acl = addACLParams; objectMD.originOp = 's3:ObjectAcl:Put'; - // Use storageType to determine if replication update is needed, as it is set only for - // "cloud" locations. This ensures that we reset replication when CRR is used, but not - // when multi-backend replication (i.e. Zenko) is used. - // TODO: this should be refactored to properly update the replication info, accounting - // for multiple rules and resetting the status only if needed CLDSRV-646 + // Rebuild replication info from the current bucket config to + // pick up any new destinations. CRR backends carry a + // resolved destination role on the entry (backbeat needs it + // to authenticate on the destination side); cloud backends + // don't (credentials live in the location configuration). + // For cloud backends, ACL replication is not supported, so + // preserve their existing status instead of resetting to + // PENDING. + const isCRR = b => !!b.role; + const replicationInfo = getReplicationInfo(config, objectKey, bucket, true); - if (replicationInfo && !replicationInfo.storageType) { + if (replicationInfo && replicationInfo.backends.some(isCRR)) { + const backends = replicationInfo.backends.map(b => { + if (isCRR(b)) { + return b; + } + + const existing = objectMD.replicationInfo.backends.find(e => e.site === b.site); + return existing || b; + }); + objectMD.replicationInfo = { ...objectMD.replicationInfo, ...replicationInfo, + backends, }; } @@ -171,4 +186,3 @@ const acl = { }; module.exports = acl; - diff --git a/tests/unit/api/apiUtils/getReplicationInfo.js b/tests/unit/api/apiUtils/getReplicationInfo.js index d8bec4c1e4..abc2360b61 100644 --- a/tests/unit/api/apiUtils/getReplicationInfo.js +++ b/tests/unit/api/apiUtils/getReplicationInfo.js @@ -5,13 +5,13 @@ const AuthInfo = require('arsenal').auth.AuthInfo; const getReplicationInfo = require('../../../../lib/api/apiUtils/object/getReplicationInfo'); -function _getObjectReplicationInfo(s3config, replicationConfig) { +function _getObjectReplicationInfo(s3config, replicationConfig, key, objectMD) { const bucketInfo = new BucketInfo( 'testbucket', 'someCanonicalId', 'accountDisplayName', new Date().toJSON(), null, null, null, null, null, null, null, null, null, replicationConfig); - return getReplicationInfo(s3config, 'fookey', bucketInfo, true, 123, null, null); + return getReplicationInfo(s3config, key || 'fookey', bucketInfo, true, 123, null, objectMD || null); } const TEST_CONFIG = { @@ -39,6 +39,9 @@ const TEST_CONFIG = { azureContainerName: 's3test' } }, + 'crr-site': { + objectId: 'crr-site', + }, }, replicationEndpoints: [{ site: 'zenko', @@ -50,276 +53,465 @@ const TEST_CONFIG = { }], }; +const TWO_PART_ROLE = 'arn:aws:iam::root:role/src-role,arn:aws:iam::root:role/dst-role'; + describe('getReplicationInfo helper', () => { - it('should get replication info when rules are enabled', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role', - rules: [{ - prefix: '', - enabled: true, - storageClass: 'awsbackend', - }], - destination: 'tosomewhere', - }; - const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); - assert.deepStrictEqual(replicationInfo, { - status: 'PENDING', - backends: [{ - site: 'awsbackend', + describe('V1 format (single rule match)', () => { + it('should get replication info when rules are enabled', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: true, + storageClass: 'awsbackend', + }], + destination: 'tosomewhere', + }; + const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); + assert.deepStrictEqual(replicationInfo, { status: 'PENDING', - dataStoreVersionId: '', - }], - content: ['METADATA'], - destination: 'tosomewhere', - storageClass: 'awsbackend', - role: 'arn:aws:iam::root:role/s3-replication-role', - storageType: 'aws_s3', - isNFS: undefined, + backends: [{ + site: 'awsbackend', + status: 'PENDING', + dataStoreVersionId: '', + }], + content: ['METADATA'], + role: 'arn:aws:iam::root:role/src-role', + isNFS: undefined, + }); }); - }); - it('should not get replication info when rules are disabled', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role', - rules: [{ - prefix: '', - enabled: false, - storageClass: 'awsbackend', - }], - destination: 'tosomewhere', - }; - const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); - assert.deepStrictEqual(replicationInfo, undefined); - }); + it('should not get replication info when rules are disabled', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: false, + storageClass: 'awsbackend', + }], + destination: 'tosomewhere', + }; + const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); + assert.deepStrictEqual(replicationInfo, undefined); + }); + + it('should match all V1 rules with overlapping prefixes', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [ + { + prefix: '', + enabled: true, + storageClass: 'awsbackend', + }, + { + prefix: '', + enabled: true, + storageClass: 'azurebackend', + }, + ], + destination: 'tosomewhere', + }; + const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); + assert.strictEqual(replicationInfo.backends.length, 2); + assert.deepStrictEqual(replicationInfo.backends.map(b => b.site).sort(), + ['awsbackend', 'azurebackend']); + }); - it('should get replication info with single cloud target', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role', - rules: [{ - prefix: '', - enabled: true, - storageClass: 'awsbackend', - }], - destination: 'tosomewhere', - }; - const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); - assert.deepStrictEqual(replicationInfo, { - status: 'PENDING', - backends: [{ - site: 'awsbackend', + it('should get replication info with multiple cloud targets (legacy comma-separated)', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: true, + storageClass: 'awsbackend,azurebackend', + }], + destination: 'tosomewhere', + }; + const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); + assert.deepStrictEqual(replicationInfo, { status: 'PENDING', - dataStoreVersionId: '', - }], - content: ['METADATA'], - destination: 'tosomewhere', - storageClass: 'awsbackend', - role: 'arn:aws:iam::root:role/s3-replication-role', - storageType: 'aws_s3', - isNFS: undefined, + backends: [{ + site: 'awsbackend', + status: 'PENDING', + dataStoreVersionId: '', + }, { + site: 'azurebackend', + status: 'PENDING', + dataStoreVersionId: '', + }], + content: ['METADATA'], + role: 'arn:aws:iam::root:role/src-role', + isNFS: undefined, + }); }); - }); - it('should get replication info with multiple cloud targets', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role', - rules: [{ - prefix: '', - enabled: true, - storageClass: 'awsbackend,azurebackend', - }], - destination: 'tosomewhere', - }; - const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); - assert.deepStrictEqual(replicationInfo, { - status: 'PENDING', - backends: [{ - site: 'awsbackend', + it('should get replication info with multiple cloud targets and ' + + 'preferred read location', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: true, + storageClass: 'awsbackend:preferred_read,azurebackend', + }], + destination: 'tosomewhere', + preferredReadLocation: 'awsbackend', + }; + const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); + assert.deepStrictEqual(replicationInfo, { status: 'PENDING', - dataStoreVersionId: '', - }, { - site: 'azurebackend', + backends: [{ + site: 'awsbackend', + status: 'PENDING', + dataStoreVersionId: '', + }, { + site: 'azurebackend', + status: 'PENDING', + dataStoreVersionId: '', + }], + content: ['METADATA'], + role: 'arn:aws:iam::root:role/src-role', + isNFS: undefined, + }); + }); + + it('should not get replication info when service account type ' + + 'cannot trigger replication', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: true, + storageClass: 'awsbackend', + }], + destination: 'tosomewhere', + }; + const bucketInfo = new BucketInfo( + 'testbucket', 'abcdef/lifecycle', 'Lifecycle Service Account', + new Date().toJSON(), + null, null, null, null, null, null, null, null, null, + replicationConfig); + const authInfo = new AuthInfo({ + canonicalID: 'abcdef/lifecycle', + accountDisplayName: 'Lifecycle Service Account', + }); + const replicationInfo = getReplicationInfo(TEST_CONFIG, + 'fookey', bucketInfo, true, 123, null, null, authInfo); + assert.deepStrictEqual(replicationInfo, undefined); + }); + + it('should get replication info when service account type can ' + + 'trigger replication', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: true, + storageClass: 'awsbackend', + }], + destination: 'tosomewhere', + }; + const bucketInfo = new BucketInfo( + 'testbucket', 'abcdef/md-ingestion', + 'Metadata Ingestion Service Account', + new Date().toJSON(), + null, null, null, null, null, null, null, null, null, + replicationConfig); + const authInfo = new AuthInfo({ + canonicalID: 'abcdef/md-ingestion', + accountDisplayName: 'Metadata Ingestion Service Account', + }); + const replicationInfo = getReplicationInfo(TEST_CONFIG, + 'fookey', bucketInfo, true, 123, null, null, authInfo); + assert.deepStrictEqual(replicationInfo, { status: 'PENDING', - dataStoreVersionId: '', - }], - content: ['METADATA'], - destination: 'tosomewhere', - storageClass: 'awsbackend,azurebackend', - role: 'arn:aws:iam::root:role/s3-replication-role', - storageType: 'aws_s3,azure', - isNFS: undefined, + backends: [{ + site: 'awsbackend', + status: 'PENDING', + dataStoreVersionId: '', + }], + content: ['METADATA'], + role: 'arn:aws:iam::root:role/src-role', + isNFS: undefined, + }); }); - }); - it('should get replication info with multiple cloud targets and ' + - 'preferred read location', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role', - rules: [{ - prefix: '', - enabled: true, - storageClass: 'awsbackend:preferred_read,azurebackend', - }], - destination: 'tosomewhere', - preferredReadLocation: 'awsbackend', - }; - const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); - assert.deepStrictEqual(replicationInfo, { - status: 'PENDING', - backends: [{ - site: 'awsbackend', + it('should fall back to default StorageClass and resolve as a CRR backend', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: true, + }], + destination: 'tosomewhere', + }; + const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); + assert.deepStrictEqual(replicationInfo, { status: 'PENDING', - dataStoreVersionId: '', - }, { - site: 'azurebackend', + backends: [{ + site: 'zenko', + status: 'PENDING', + dataStoreVersionId: '', + destination: 'tosomewhere', + role: 'arn:aws:iam::root:role/dst-role', + }], + content: ['METADATA'], + role: 'arn:aws:iam::root:role/src-role', + isNFS: undefined, + }); + }); + + it('should return replication info with cloud backend even when no replication endpoint is configured', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: true, + storageClass: 'awsbackend', + }], + destination: 'tosomewhere', + }; + const configWithNoReplicationEndpoint = { + locationConstraints: TEST_CONFIG.locationConstraints, + replicationEndpoints: [], + }; + const replicationInfo = _getObjectReplicationInfo(configWithNoReplicationEndpoint, + replicationConfig); + assert.deepStrictEqual(replicationInfo, { status: 'PENDING', - dataStoreVersionId: '', - }], - content: ['METADATA'], - destination: 'tosomewhere', - storageClass: 'awsbackend:preferred_read,azurebackend', - role: 'arn:aws:iam::root:role/s3-replication-role', - storageType: 'aws_s3,azure', - isNFS: undefined, + backends: [{ + site: 'awsbackend', + status: 'PENDING', + dataStoreVersionId: '', + }], + content: ['METADATA'], + role: 'arn:aws:iam::root:role/src-role', + isNFS: undefined, + }); }); - }); - it('should not get replication info when service account type ' + - 'cannot trigger replication', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role', - rules: [{ - prefix: '', - enabled: true, - storageClass: 'awsbackend', - }], - destination: 'tosomewhere', - }; - const bucketInfo = new BucketInfo( - 'testbucket', 'abcdef/lifecycle', 'Lifecycle Service Account', - new Date().toJSON(), - null, null, null, null, null, null, null, null, null, - replicationConfig); - const authInfo = new AuthInfo({ - canonicalID: 'abcdef/lifecycle', - accountDisplayName: 'Lifecycle Service Account', + it('should return undefined with default StorageClass if no replication endpoint is configured', () => { + const replicationConfig = { + role: TWO_PART_ROLE, + rules: [{ + prefix: '', + enabled: true, + }], + destination: 'tosomewhere', + }; + const configWithNoReplicationEndpoint = { + locationConstraints: TEST_CONFIG.locationConstraints, + replicationEndpoints: [], + }; + const replicationInfo = _getObjectReplicationInfo(configWithNoReplicationEndpoint, + replicationConfig); + assert.deepStrictEqual(replicationInfo, undefined); }); - const replicationInfo = getReplicationInfo(TEST_CONFIG, - 'fookey', bucketInfo, true, 123, null, null, authInfo); - assert.deepStrictEqual(replicationInfo, undefined); }); - it('should get replication info when service account type can ' + - 'trigger replication', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role', - rules: [{ - prefix: '', - enabled: true, - storageClass: 'awsbackend', - }], - destination: 'tosomewhere', - }; - const bucketInfo = new BucketInfo( - 'testbucket', 'abcdef/md-ingestion', - 'Metadata Ingestion Service Account', - new Date().toJSON(), - null, null, null, null, null, null, null, null, null, - replicationConfig); - const authInfo = new AuthInfo({ - canonicalID: 'abcdef/md-ingestion', - accountDisplayName: 'Metadata Ingestion Service Account', + // --- V2 Format Tests (multi-rule matching) --- + describe('V2 format (multi-rule matching)', () => { + const V2_ROLE = 'arn:aws:iam::123456:role/src-role,arn:aws:iam::111111:role/dst-role'; + + it('should match all rules with overlapping prefixes', () => { + const replicationConfig = { + role: V2_ROLE, + rules: [ + { + prefix: '', + enabled: true, + priority: 1, + storageClass: 'awsbackend', + destination: 'arn:aws:s3:::bucket-a', + role: 'arn:aws:iam::222222:role/dst-role', + }, + { + prefix: 'docs', + enabled: true, + priority: 2, + storageClass: 'azurebackend', + destination: 'arn:aws:s3:::bucket-b', + role: 'arn:aws:iam::333333:role/dst-role', + }, + ], + }; + const replicationInfo = _getObjectReplicationInfo( + TEST_CONFIG, replicationConfig, 'docs/report.pdf'); + assert.strictEqual(replicationInfo.status, 'PENDING'); + assert.strictEqual(replicationInfo.backends.length, 2); + const awsBackend = replicationInfo.backends.find(b => b.site === 'awsbackend'); + const azureBackend = replicationInfo.backends.find(b => b.site === 'azurebackend'); + assert.ok(awsBackend); + assert.ok(azureBackend); + // Cloud backends do not carry per-backend destination/role + // or storageType (location type is resolved from config). + assert.strictEqual(awsBackend.destination, undefined); + assert.strictEqual(awsBackend.role, undefined); + assert.strictEqual(awsBackend.storageType, undefined); + assert.strictEqual(azureBackend.destination, undefined); + assert.strictEqual(azureBackend.role, undefined); + assert.strictEqual(azureBackend.storageType, undefined); }); - const replicationInfo = getReplicationInfo(TEST_CONFIG, - 'fookey', bucketInfo, true, 123, null, null, authInfo); - assert.deepStrictEqual(replicationInfo, { - status: 'PENDING', - backends: [{ - site: 'awsbackend', - status: 'PENDING', - dataStoreVersionId: '', - }], - content: ['METADATA'], - destination: 'tosomewhere', - storageClass: 'awsbackend', - role: 'arn:aws:iam::root:role/s3-replication-role', - storageType: 'aws_s3', - isNFS: undefined, + + it('should only match rules whose prefix matches the object key', () => { + const replicationConfig = { + role: V2_ROLE, + rules: [ + { + prefix: '', + enabled: true, + priority: 1, + storageClass: 'awsbackend', + destination: 'arn:aws:s3:::bucket-a', + }, + { + prefix: 'logs', + enabled: true, + priority: 2, + storageClass: 'azurebackend', + destination: 'arn:aws:s3:::bucket-b', + }, + ], + }; + const replicationInfo = _getObjectReplicationInfo( + TEST_CONFIG, replicationConfig, 'docs/report.pdf'); + assert.strictEqual(replicationInfo.backends.length, 1); + assert.strictEqual(replicationInfo.backends[0].site, 'awsbackend'); }); - }); - it('should get replication info with default StorageClass when rules are enabled', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role-1,arn:aws:iam::root:role/s3-replication-role-2', - rules: [{ - prefix: '', - enabled: true, - }], - destination: 'tosomewhere', - }; - const replicationInfo = _getObjectReplicationInfo(TEST_CONFIG, replicationConfig); - assert.deepStrictEqual(replicationInfo, { - status: 'PENDING', - backends: [{ - site: 'zenko', - status: 'PENDING', - dataStoreVersionId: '', - }], - content: ['METADATA'], - destination: 'tosomewhere', - storageClass: 'zenko', - role: 'arn:aws:iam::root:role/s3-replication-role-1,arn:aws:iam::root:role/s3-replication-role-2', - storageType: '', - isNFS: undefined, + it('should deduplicate by site using highest priority for CRR site', () => { + const replicationConfig = { + role: V2_ROLE, + rules: [ + { + prefix: '', + enabled: true, + priority: 1, + storageClass: 'crr-site', + destination: 'arn:aws:s3:::bucket-a', + role: 'arn:aws:iam::111111:role/dst-role', + }, + { + prefix: 'docs', + enabled: true, + priority: 5, + storageClass: 'crr-site', + destination: 'arn:aws:s3:::bucket-b', + role: 'arn:aws:iam::222222:role/dst-role', + }, + ], + }; + const replicationInfo = _getObjectReplicationInfo( + TEST_CONFIG, replicationConfig, 'docs/report.pdf'); + assert.strictEqual(replicationInfo.backends.length, 1); + assert.strictEqual(replicationInfo.backends[0].site, 'crr-site'); + // Priority 5 rule should win + assert.strictEqual(replicationInfo.backends[0].destination, + 'arn:aws:s3:::bucket-b'); + assert.strictEqual(replicationInfo.backends[0].role, + 'arn:aws:iam::222222:role/dst-role'); }); - }); - it('should return undefined with specified StorageClass mode if no replication endpoint is configured', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role', - rules: [{ - prefix: '', - enabled: true, - storageClass: 'awsbackend', - }], - destination: 'tosomewhere', - }; - const configWithNoReplicationEndpoint = { - locationConstraints: TEST_CONFIG.locationConstraints, - replicationEndpoints: [], - }; - const replicationInfo = _getObjectReplicationInfo(configWithNoReplicationEndpoint, - replicationConfig); - assert.deepStrictEqual(replicationInfo, { - status: 'PENDING', - backends: [{ - site: 'awsbackend', - status: 'PENDING', - dataStoreVersionId: '', - }], - content: ['METADATA'], - destination: 'tosomewhere', - storageClass: 'awsbackend', - role: 'arn:aws:iam::root:role/s3-replication-role', - storageType: 'aws_s3', - isNFS: undefined, + it('should skip disabled rules', () => { + const replicationConfig = { + role: V2_ROLE, + rules: [ + { + prefix: '', + enabled: true, + priority: 1, + storageClass: 'awsbackend', + destination: 'arn:aws:s3:::bucket-a', + }, + { + prefix: '', + enabled: false, + priority: 2, + storageClass: 'azurebackend', + destination: 'arn:aws:s3:::bucket-b', + }, + ], + }; + const replicationInfo = _getObjectReplicationInfo( + TEST_CONFIG, replicationConfig, 'docs/report.pdf'); + assert.strictEqual(replicationInfo.backends.length, 1); + assert.strictEqual(replicationInfo.backends[0].site, 'awsbackend'); + }); + + it('should return undefined when no V2 rules match', () => { + const replicationConfig = { + role: V2_ROLE, + rules: [ + { + prefix: 'logs/', + enabled: true, + priority: 1, + storageClass: 'awsbackend', + destination: 'arn:aws:s3:::bucket-a', + }, + ], + }; + const replicationInfo = _getObjectReplicationInfo( + TEST_CONFIG, replicationConfig, 'docs/report.pdf'); + assert.strictEqual(replicationInfo, undefined); }); - }); - it('should return undefined with default StorageClass if no replication endpoint is configured', () => { - const replicationConfig = { - role: 'arn:aws:iam::root:role/s3-replication-role-1,arn:aws:iam::root:role/s3-replication-role-2', - rules: [{ - prefix: '', - enabled: true, - }], - destination: 'tosomewhere', - }; - const configWithNoReplicationEndpoint = { - locationConstraints: TEST_CONFIG.locationConstraints, - replicationEndpoints: [], - }; - const replicationInfo = _getObjectReplicationInfo(configWithNoReplicationEndpoint, - replicationConfig); - assert.deepStrictEqual(replicationInfo, undefined); + it('should set top-level role to the source role only', () => { + const replicationConfig = { + role: V2_ROLE, + rules: [ + { + prefix: '', + enabled: true, + priority: 1, + storageClass: 'awsbackend', + destination: 'arn:aws:s3:::bucket-a', + }, + ], + }; + const replicationInfo = _getObjectReplicationInfo( + TEST_CONFIG, replicationConfig, 'fookey'); + assert.strictEqual(replicationInfo.role, + 'arn:aws:iam::123456:role/src-role'); + }); + + it('should handle mixed CRR and cloud backends', () => { + const replicationConfig = { + role: V2_ROLE, + rules: [ + { + prefix: '', + enabled: true, + priority: 1, + storageClass: 'crr-site', + destination: 'arn:aws:s3:::bucket-a', + role: 'arn:aws:iam::222222:role/dst-role', + }, + { + prefix: '', + enabled: true, + priority: 2, + storageClass: 'awsbackend', + destination: 'arn:aws:s3:::bucket-b', + }, + ], + }; + const replicationInfo = _getObjectReplicationInfo( + TEST_CONFIG, replicationConfig, 'fookey'); + assert.strictEqual(replicationInfo.backends.length, 2); + const crrBackend = replicationInfo.backends.find(b => b.site === 'crr-site'); + const cloudBackend = replicationInfo.backends.find(b => b.site === 'awsbackend'); + assert.ok(crrBackend); + assert.ok(cloudBackend); + // CRR backend has destination/role + assert.strictEqual(crrBackend.destination, 'arn:aws:s3:::bucket-a'); + assert.strictEqual(crrBackend.role, 'arn:aws:iam::222222:role/dst-role'); + assert.strictEqual(crrBackend.storageType, undefined); + // Cloud backend has neither (location type is resolved from config) + assert.strictEqual(cloudBackend.destination, undefined); + assert.strictEqual(cloudBackend.role, undefined); + assert.strictEqual(cloudBackend.storageType, undefined); + }); }); }); diff --git a/tests/unit/api/objectReplicationMD.js b/tests/unit/api/objectReplicationMD.js index 48451b43ce..2cc69d4743 100644 --- a/tests/unit/api/objectReplicationMD.js +++ b/tests/unit/api/objectReplicationMD.js @@ -78,23 +78,16 @@ const emptyReplicationMD = { role: '', storageType: '', dataStoreVersionId: '', - isNFS: undefined, -}; -const expectedEmptyReplicationMD = { - status: '', - backends: [], - content: [], - destination: '', - storageClass: '', - role: '', - storageType: '', - dataStoreVersionId: '', }; // Check that the object key has the expected replication information. +// Normalizes via JSON round-trip to drop undefined-valued keys so that +// expectations don't need to know whether the MD path went through a +// metadata read (which JSON-serializes and drops undefined fields). function checkObjectReplicationInfo(key, expected) { const objectMD = metadata.keyMaps.get(bucketName).get(key); - assert.deepStrictEqual(objectMD.replicationInfo, expected); + const actual = JSON.parse(JSON.stringify(objectMD.replicationInfo)); + assert.deepStrictEqual(actual, expected); } // Put the object key and check the replication information. @@ -241,7 +234,7 @@ describe('Replication object MD without bucket replication config', () => { if (err) { return done(err); } - checkObjectReplicationInfo(keyA, expectedEmptyReplicationMD); + checkObjectReplicationInfo(keyA, emptyReplicationMD); return done(); })); @@ -252,7 +245,7 @@ describe('Replication object MD without bucket replication config', () => { ], err => done(err))); it('should not update object metadata if putting tag', done => { - checkObjectReplicationInfo(keyA, expectedEmptyReplicationMD); + checkObjectReplicationInfo(keyA, emptyReplicationMD); return done(); }); @@ -266,7 +259,7 @@ describe('Replication object MD without bucket replication config', () => { if (err) { return done(err); } - checkObjectReplicationInfo(keyA, expectedEmptyReplicationMD); + checkObjectReplicationInfo(keyA, emptyReplicationMD); return done(); })); @@ -275,7 +268,7 @@ describe('Replication object MD without bucket replication config', () => { if (err) { return done(err); } - checkObjectReplicationInfo(keyA, expectedEmptyReplicationMD); + checkObjectReplicationInfo(keyA, emptyReplicationMD); return done(); })); @@ -299,18 +292,16 @@ describe('Replication object MD without bucket replication config', () => { site: 'zenko', status: 'PENDING', dataStoreVersionId: '', + destination: bucketARN, + role: 'arn:aws:iam::account-id:role/dest-resource', }], content: ['DATA', 'METADATA'], - destination: bucketARN, - storageClass: 'zenko', - role: 'arn:aws:iam::account-id:role/src-resource,' + - 'arn:aws:iam::account-id:role/dest-resource', + storageClass: '', + role: 'arn:aws:iam::account-id:role/src-resource', storageType: '', dataStoreVersionId: '', - isNFS: undefined, }; - const newReplicationMD = hasStorageClass ? Object.assign(replicationMD, - { storageClass: storageClassType }) : replicationMD; + const newReplicationMD = replicationMD; const replicateMetadataOnly = Object.assign({}, newReplicationMD, { content: ['METADATA'] }); @@ -393,7 +384,14 @@ describe('Replication object MD without bucket replication config', () => { type: 'aws_s3', }; - const replicationMD = { ...newReplicationMD, storageType: 'aws_s3' }; + const replicationMD = { + ...newReplicationMD, + backends: [{ + site: 'zenko', + status: 'PENDING', + dataStoreVersionId: '', + }], + }; let completedReplicationInfo; async.series([ @@ -550,30 +548,21 @@ describe('Replication object MD without bucket replication config', () => { 'azurebackend', 'gcpbackend', 'awsbackend,azurebackend'].forEach(backend => { - const storageTypeMap = { - 'awsbackend': 'aws_s3', - 'azurebackend': 'azure', - 'gcpbackend': 'gcp', - 'awsbackend,azurebackend': 'aws_s3,azure', - }; - const storageType = storageTypeMap[backend]; const backends = backend.split(',').map(site => ({ site, status: 'PENDING', dataStoreVersionId: '', })); - describe('Object metadata replicationInfo storageType value', + describe('Object metadata replicationInfo for cloud backends', () => { const expectedReplicationInfo = { status: 'PENDING', backends, content: ['DATA', 'METADATA'], - destination: 'arn:aws:s3:::destination-bucket', - storageClass: backend, + storageClass: '', role: 'arn:aws:iam::account-id:role/resource', - storageType, + storageType: '', dataStoreVersionId: '', - isNFS: undefined, }; // Expected for a metadata-only replication operation (for @@ -584,8 +573,7 @@ describe('Replication object MD without bucket replication config', () => { beforeEach(() => // We have already created the bucket, so update the // replication configuration to include a location - // constraint for the `storageClass`. This results in a - // `storageType` of 'aws_s3', for example. + // constraint for the storage class. Object.assign(metadata.buckets.get(bucketName), { _replicationConfiguration: { role: 'arn:aws:iam::account-id:role/resource', @@ -718,3 +706,141 @@ describe('Replication object MD without bucket replication config', () => { }); }); }); + +describe('Replication object MD with CRR and cloud destinations on the same object', () => { + const crrSite = 'crr-site'; + const cloudSite = 'awsbackend'; + const crrRule = { + id: 'rule-crr', + prefix: keyA, + enabled: true, + priority: 1, + storageClass: crrSite, + destination: 'arn:aws:s3:::crr-bucket', + }; + const cloudRule = { + id: 'rule-cloud', + prefix: keyA, + enabled: true, + priority: 2, + storageClass: cloudSite, + destination: 'arn:aws:s3:::aws-bucket', + }; + + function setupBucket(rules) { + cleanup(); + createBucket(); + config.locationConstraints[crrSite] = { type: '' }; + Object.assign(metadata.buckets.get(bucketName), { + _versioningConfiguration: { status: 'Enabled' }, + _replicationConfiguration: { + role: 'arn:aws:iam::account-id:role/src-role,' + + 'arn:aws:iam::account-id:role/dst-role', + rules, + }, + }); + } + + function completeAllBackends() { + const objectMD = metadata.keyMaps.get(bucketName).get(keyA); + objectMD.replicationInfo.status = 'COMPLETED'; + objectMD.replicationInfo.backends.forEach(b => { + // eslint-disable-next-line no-param-reassign + b.status = 'COMPLETED'; + }); + } + + afterEach(() => { + cleanup(); + delete config.locationConstraints[crrSite]; + }); + + it('should reset only the CRR backend to PENDING on putObjectACL, ' + + 'preserving the completed status of the cloud backend', done => { + setupBucket([crrRule, cloudRule]); + async.series([ + next => objectPut(authInfo, getObjectPutReq(keyA, true), + undefined, log, next), + next => { + completeAllBackends(); + return objectPutACL(authInfo, objectACLReq, log, next); + }, + ], err => { + if (err) { + return done(err); + } + const objectMD = metadata.keyMaps.get(bucketName).get(keyA); + const crrBackend = objectMD.replicationInfo.backends + .find(b => b.site === crrSite); + const cloudBackend = objectMD.replicationInfo.backends + .find(b => b.site === cloudSite); + // CRR backend is re-kicked: status reset to PENDING with the + // resolved destination role stamped on the entry. + assert.strictEqual(crrBackend.status, 'PENDING'); + assert.strictEqual(crrBackend.role, + 'arn:aws:iam::account-id:role/dst-role'); + assert.strictEqual(crrBackend.destination, + 'arn:aws:s3:::crr-bucket'); + // Cloud backend is left alone: no ACL replication for cloud, + // and no resolved role/destination on the entry. + assert.strictEqual(cloudBackend.status, 'COMPLETED'); + assert.strictEqual(cloudBackend.role, undefined); + assert.strictEqual(cloudBackend.destination, undefined); + return done(); + }); + }); + + it('should not touch replicationInfo when no CRR backend is present', + done => { + setupBucket([cloudRule]); + async.series([ + next => objectPut(authInfo, getObjectPutReq(keyA, true), + undefined, log, next), + next => { + completeAllBackends(); + return objectPutACL(authInfo, objectACLReq, log, next); + }, + ], err => { + if (err) { + return done(err); + } + const objectMD = metadata.keyMaps.get(bucketName).get(keyA); + // Status untouched because nothing to ACL-replicate. + assert.strictEqual(objectMD.replicationInfo.status, 'COMPLETED'); + objectMD.replicationInfo.backends.forEach(b => { + assert.strictEqual(b.status, 'COMPLETED'); + }); + return done(); + }); + }); + + it('should add a newly configured CRR destination to backends on ' + + 'putObjectACL', done => { + setupBucket([cloudRule]); + async.series([ + next => objectPut(authInfo, getObjectPutReq(keyA, true), + undefined, log, next), + next => { + completeAllBackends(); + // Operator adds a CRR destination after the object was + // already replicated to cloud. + metadata.buckets.get(bucketName) + ._replicationConfiguration.rules.unshift(crrRule); + return objectPutACL(authInfo, objectACLReq, log, next); + }, + ], err => { + if (err) { + return done(err); + } + const objectMD = metadata.keyMaps.get(bucketName).get(keyA); + const crrBackend = objectMD.replicationInfo.backends + .find(b => b.site === crrSite); + const cloudBackend = objectMD.replicationInfo.backends + .find(b => b.site === cloudSite); + assert.ok(crrBackend, 'new CRR backend should be added'); + assert.strictEqual(crrBackend.status, 'PENDING'); + assert.strictEqual(cloudBackend.status, 'COMPLETED'); + return done(); + }); + }); +});