From 652e09ca0a37ba9762bb612b4a1466611bc53236 Mon Sep 17 00:00:00 2001 From: Nick Miles Date: Tue, 11 Nov 2025 10:20:03 +0000 Subject: [PATCH] ENG-932 Restore to RDS in source for testing Remove the thin wrapper Patching Fixes to deployment Restructured for correct iam role and change of params Updating permissions for RDS copy Access permissions Formatting Permission updates to include tagging Fixes for KMS RDS Resources Fix resources for copy Fix resources for copy Fix kms for roles Added extra KMS key statements Updated example --- examples/destination/aws-backups.tf | 46 ++++ examples/source/aws-backups.tf | 3 + modules/aws-backup-destination/iam.tf | 233 +++++++++++++----- .../parameter_store_kms.tf | 95 +++++++ .../aws-backup-source/backup_vault_policy.tf | 5 +- .../lambda_copy_recovery_point.tf | 22 ++ .../lambda_restore_to_rds.tf | 83 +++++++ .../resources/restore-to-rds/README.md | 78 ++++++ .../restore-to-rds/restore_to_rds.py | 170 +++++++++++++ .../restore-to-rds/test_restore_to_rds.py | 99 ++++++++ modules/aws-backup-source/variables.tf | 17 ++ 11 files changed, 782 insertions(+), 69 deletions(-) create mode 100644 modules/aws-backup-source/lambda_restore_to_rds.tf create mode 100644 modules/aws-backup-source/resources/restore-to-rds/README.md create mode 100644 modules/aws-backup-source/resources/restore-to-rds/restore_to_rds.py create mode 100644 modules/aws-backup-source/resources/restore-to-rds/test_restore_to_rds.py diff --git a/examples/destination/aws-backups.tf b/examples/destination/aws-backups.tf index a43fae9..5ebf2f9 100644 --- a/examples/destination/aws-backups.tf +++ b/examples/destination/aws-backups.tf @@ -3,6 +3,12 @@ provider "aws" { region = "eu-west-2" } +variable "name_prefix" { + description = "Optional name prefix used by destination module for IAM role names" + type = string + default = "" +} + variable "source_terraform_role_arn" { description = "ARN of the terraform role in the source account" type = string @@ -21,6 +27,8 @@ locals { source_account_id = data.aws_arn.source_terraform_role.account destination_account_id = data.aws_caller_identity.current.account_id + + copy_recovery_role_name = var.name_prefix != "" ? "${var.name_prefix}-copy-recovery-point" : "copy-recovery-point" } @@ -41,6 +49,43 @@ resource "aws_kms_key" "destination_backup_key" { } Action = "kms:*" Resource = "*" + }, + { + Sid = "AllowCrossAccountBackupKeyOperations" + Effect = "Allow" + Principal = { + AWS = [ + "arn:aws:iam::${local.destination_account_id}:role/${local.copy_recovery_role_name}", + "arn:aws:iam::${local.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + Action = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ] + Resource = "*" + }, + { + Sid = "AllowCrossAccountBackupGrants" + Effect = "Allow" + Principal = { + AWS = [ + "arn:aws:iam::${local.destination_account_id}:role/${local.copy_recovery_role_name}", + "arn:aws:iam::${local.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + Action = [ + "kms:CreateGrant" + ] + Resource = "*" + Condition = { + Bool = { + "kms:GrantIsForAWSResource" = "true" + } + } } ] }) @@ -52,6 +97,7 @@ module "destination" { source_account_name = "source" # please note that the assigned value would be the prefix in aws_backup_vault.vault.name account_id = local.destination_account_id source_account_id = local.source_account_id + name_prefix = var.name_prefix kms_key = aws_kms_key.destination_backup_key.arn enable_vault_protection = false enable_iam_protection = false diff --git a/examples/source/aws-backups.tf b/examples/source/aws-backups.tf index bd4893c..335be4c 100644 --- a/examples/source/aws-backups.tf +++ b/examples/source/aws-backups.tf @@ -203,5 +203,8 @@ module "source" { lambda_copy_recovery_point_enable = var.copy_recovery_point_role_arn != "" lambda_copy_recovery_point_assume_role_arn = var.copy_recovery_point_role_arn + # Enable RDS restore Lambda + lambda_restore_to_rds_enable = true + } diff --git a/modules/aws-backup-destination/iam.tf b/modules/aws-backup-destination/iam.tf index 8596fb4..fdef8a3 100644 --- a/modules/aws-backup-destination/iam.tf +++ b/modules/aws-backup-destination/iam.tf @@ -4,100 +4,197 @@ ############################################# locals { - copy_recovery_role_name = coalesce(var.name_prefix, "") != "" ? "${var.name_prefix}-copy-recovery-point" : "copy-recovery-point" + copy_recovery_role_name = coalesce(var.name_prefix, "") != "" ? "${var.name_prefix}-copy-recovery-point" : "copy-recovery-point" } data "aws_iam_policy_document" "copy_recovery_point_assume" { - count = var.enable_cross_account_vault_access ? 1 : 0 + count = var.enable_cross_account_vault_access ? 1 : 0 - statement { - effect = "Allow" - principals { - type = "AWS" - identifiers = ["arn:aws:iam::${var.source_account_id}:root"] - } - actions = ["sts:AssumeRole"] - } + statement { + effect = "Allow" + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${var.source_account_id}:root"] + } + actions = ["sts:AssumeRole"] + } - # Allow AWS Backup service to assume when executing StartCopyJob in this account - statement { - effect = "Allow" - principals { - type = "Service" - identifiers = ["backup.amazonaws.com"] - } - actions = ["sts:AssumeRole"] - } + # Allow AWS Backup service to assume when executing StartCopyJob in this account + statement { + effect = "Allow" + principals { + type = "Service" + identifiers = [ + "backup.amazonaws.com", + "rds.amazonaws.com" + ] + } + actions = ["sts:AssumeRole"] + } } resource "aws_iam_role" "copy_recovery_point" { - count = var.enable_cross_account_vault_access ? 1 : 0 - name = local.copy_recovery_role_name - assume_role_policy = data.aws_iam_policy_document.copy_recovery_point_assume[0].json - description = "Role assumed by source account lambda to start and describe AWS Backup copy jobs, also passed to AWS Backup service for execution" - tags = { - ModuleComponent = "aws-backup-destination" - Purpose = "copy-recovery-point-cross-account" - } + count = var.enable_cross_account_vault_access ? 1 : 0 + name = local.copy_recovery_role_name + assume_role_policy = data.aws_iam_policy_document.copy_recovery_point_assume[0].json + description = "Role assumed by source account lambda to start and describe AWS Backup copy jobs, also passed to AWS Backup service for execution" + tags = { + ModuleComponent = "aws-backup-destination" + Purpose = "copy-recovery-point-cross-account" + } } data "aws_iam_policy_document" "copy_recovery_point_permissions" { - count = var.enable_cross_account_vault_access ? 1 : 0 + count = var.enable_cross_account_vault_access ? 1 : 0 - # Start copy job (resource-level supports recoveryPoint*) - statement { - effect = "Allow" - actions = [ - "backup:StartCopyJob" - ] - # Recovery points originate from the source account; allow any recovery point ARN pattern for that account & any region used via var.region - resources = ["arn:aws:backup:${var.region}:${var.account_id}:recovery-point:*"] - } + # Start copy job (resource-level supports recoveryPoint*) + statement { + effect = "Allow" + actions = [ + "backup:StartCopyJob" + ] + # Recovery points originate from the source account; allow any recovery point ARN pattern for that account & any region used via var.region + resources = ["arn:aws:backup:${var.region}:${var.account_id}:recovery-point:*"] + } - # Describe copy job (no resource-level restriction) - statement { - effect = "Allow" - actions = [ - "backup:DescribeCopyJob" + # Describe copy job (no resource-level restriction) + statement { + sid = "BackupServicePermissions" + effect = "Allow" + actions = [ + "backup:StartCopyJob", + "backup:CopyIntoBackupVault", + "backup:DescribeCopyJob", + "backup:DescribeBackupVault", + "backup:DescribeRecoveryPoint", + "backup:DescribeBackupJob", + "backup:GetBackupVaultAccessPolicy", + "backup:StopBackupJob", + "backup:ListRecoveryPointsByBackupVault", + "backup:ListCopyJobs", + "backup:GetRecoveryPointRestoreMetadata", + "backup:UpdateRecoveryPointLifecycle", + "backup:PutBackupVaultAccessPolicy", + "backup:ListRecoveryPointsByResource", + "backup:GetBackupPlan", + "backup:ListBackupJobs", + "backup:TagResource", + "backup:UntagResource", + "backup:ListTags", + "backup:ListBackupVaults", + "backup:CreateBackupVault", + "backup:GetBackupVaultNotifications", + "backup:PutBackupVaultNotifications", + "backup:DescribeProtectedResource", + "backup:ListProtectedResources" ] - resources = ["*"] - } + resources = ["*"] + } - statement { - effect = "Allow" - actions = [ + statement { + sid = "CopyBackupPermissions" + effect = "Allow" + actions = [ "backup:CopyIntoBackupVault", "backup:CopyFromBackupVault" ] - resources = [ + resources = [ "arn:aws:backup:${var.region}:${var.account_id}:recovery-point:*", "arn:aws:backup:${var.region}:${var.account_id}:backup-vault:${aws_backup_vault.vault.name}", - "arn:aws:backup:${var.region}:${var.source_account_id}:backup-vault:*" + "arn:aws:backup:${var.region}:${var.source_account_id}:backup-vault:*", + "arn:aws:rds:${var.region}:${var.account_id}:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:*", + "arn:aws:s3:::*", + "arn:aws:s3:::*/*", + "arn:aws:dynamodb:${var.region}:${var.account_id}:table/*", + "arn:aws:dynamodb:${var.region}:${var.source_account_id}:table/*", + "arn:aws:ec2:${var.region}:${var.account_id}:volume/*", + "arn:aws:ec2:${var.region}:${var.source_account_id}:volume/*", + "arn:aws:ec2:${var.region}:${var.account_id}:snapshot/*", + "arn:aws:ec2:${var.region}:${var.source_account_id}:snapshot/*", + "arn:aws:efs:${var.region}:${var.account_id}:file-system/*", + "arn:aws:efs:${var.region}:${var.source_account_id}:file-system/*" + ] + } + + statement { + sid = "RDSPermissions" + effect = "Allow" + actions = [ + "rds:CopyDBSnapshot", + "rds:DescribeDBSnapshots", + "rds:ModifyDBSnapshotAttribute", + "rds:DescribeDBInstances", + "rds:DescribeDBClusters", + "rds:CopyDBClusterSnapshot", + "rds:DescribeDBClusterSnapshots", + "rds:AddTagsToResource", + "rds:ListTagsForResource" + ] + resources = [ + "arn:aws:rds:${var.region}:${var.account_id}:db:*", + "arn:aws:rds:${var.region}:${var.account_id}:snapshot:*", + "arn:aws:rds:${var.region}:${var.account_id}:cluster:*", + "arn:aws:rds:${var.region}:${var.account_id}:cluster-snapshot:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:db:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:snapshot:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:cluster:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:cluster-snapshot:*" + ] + } + + statement { + sid = "BackupTagPermissions" + effect = "Allow" + actions = [ + "backup:TagResource" + ] + resources = [ + "arn:aws:backup:${var.region}:${var.account_id}:recovery-point:*" + ] + } + + statement { + sid = "KMSPermissions" + effect = "Allow" + actions = [ + "kms:Decrypt", + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey", + "kms:CreateGrant", + "kms:RetireGrant", + "kms:ListGrants" + ] + resources = [ + "arn:aws:kms:${var.region}:${var.account_id}:key/*" ] - } + } - # Pass this role to AWS Backup service when invoking StartCopyJob with IamRoleArn - statement { - effect = "Allow" - actions = ["iam:PassRole"] - resources = [aws_iam_role.copy_recovery_point[0].arn] - condition { - test = "StringEquals" - variable = "iam:PassedToService" - values = ["backup.amazonaws.com"] - } - } + # Pass this role to AWS Backup service when invoking StartCopyJob with IamRoleArn + statement { + sid = "IAMPermissions" + effect = "Allow" + actions = ["iam:PassRole"] + resources = [aws_iam_role.copy_recovery_point[0].arn] + condition { + test = "StringEquals" + variable = "iam:PassedToService" + values = ["backup.amazonaws.com"] + } + } } resource "aws_iam_role_policy" "copy_recovery_point_policy" { - count = var.enable_cross_account_vault_access ? 1 : 0 - name = "${local.copy_recovery_role_name}-policy" - role = aws_iam_role.copy_recovery_point[0].id - policy = data.aws_iam_policy_document.copy_recovery_point_permissions[0].json + count = var.enable_cross_account_vault_access ? 1 : 0 + name = "${local.copy_recovery_role_name}-policy" + role = aws_iam_role.copy_recovery_point[0].id + policy = data.aws_iam_policy_document.copy_recovery_point_permissions[0].json } output "copy_recovery_point_role_arn" { - description = "ARN of role to assume from source account lambda (set ASSUME_ROLE_ARN to this). Only present if enabled." - value = try(aws_iam_role.copy_recovery_point[0].arn, null) - depends_on = [aws_iam_role.copy_recovery_point] + description = "ARN of role to assume from source account lambda (set ASSUME_ROLE_ARN to this). Only present if enabled." + value = try(aws_iam_role.copy_recovery_point[0].arn, null) + depends_on = [aws_iam_role.copy_recovery_point] } diff --git a/modules/aws-backup-destination/parameter_store_kms.tf b/modules/aws-backup-destination/parameter_store_kms.tf index dc5e068..4497df5 100644 --- a/modules/aws-backup-destination/parameter_store_kms.tf +++ b/modules/aws-backup-destination/parameter_store_kms.tf @@ -30,6 +30,101 @@ data "aws_iam_policy_document" "kms_key_policy" { resources = ["*"] } } + + dynamic "statement" { + for_each = var.enable_cross_account_vault_access ? ["allow_backup_key_ops"] : [] + + content { + sid = "AllowCrossAccountBackupKeyOperations" + effect = "Allow" + principals { + type = "AWS" + identifiers = [ + try(aws_iam_role.copy_recovery_point[0].arn, ""), + "arn:aws:iam::${var.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ] + resources = ["*"] + } + } + + dynamic "statement" { + for_each = var.enable_cross_account_vault_access ? ["allow_backup_grants"] : [] + + content { + sid = "AllowCrossAccountBackupGrants" + effect = "Allow" + principals { + type = "AWS" + identifiers = [ + try(aws_iam_role.copy_recovery_point[0].arn, ""), + "arn:aws:iam::${var.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + actions = [ + "kms:CreateGrant" + ] + resources = ["*"] + condition { + test = "Bool" + variable = "kms:GrantIsForAWSResource" + values = ["true"] + } + } + } + + # Additional explicit cross-account backup role permissions mirroring example policy structure + dynamic "statement" { + for_each = var.enable_cross_account_vault_access ? ["add_explicit_backup_key_ops"] : [] + content { + sid = "AllowCrossAccountBackupKeyOperationsExplicit" + effect = "Allow" + principals { + type = "AWS" + identifiers = [ + try(aws_iam_role.copy_recovery_point[0].arn, ""), + "arn:aws:iam::${var.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ] + resources = ["*"] + } + } + + dynamic "statement" { + for_each = var.enable_cross_account_vault_access ? ["add_explicit_backup_grants"] : [] + content { + sid = "AllowCrossAccountBackupGrantsExplicit" + effect = "Allow" + principals { + type = "AWS" + identifiers = [ + try(aws_iam_role.copy_recovery_point[0].arn, ""), + "arn:aws:iam::${var.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + actions = ["kms:CreateGrant"] + resources = ["*"] + condition { + test = "Bool" + variable = "kms:GrantIsForAWSResource" + values = ["true"] + } + } + } } resource "aws_kms_key" "parameter_store_key" { diff --git a/modules/aws-backup-source/backup_vault_policy.tf b/modules/aws-backup-source/backup_vault_policy.tf index 392394d..37c9302 100644 --- a/modules/aws-backup-source/backup_vault_policy.tf +++ b/modules/aws-backup-source/backup_vault_policy.tf @@ -40,7 +40,10 @@ data "aws_iam_policy_document" "vault_policy" { principals { type = "AWS" - identifiers = ["arn:aws:iam::${var.backup_copy_vault_account_id}:root"] + identifiers = [ + "arn:aws:iam::${var.backup_copy_vault_account_id}:root", + "arn:aws:iam::${var.backup_copy_vault_account_id}:role/${var.name_prefix}-copy-recovery-point" + ] } } } diff --git a/modules/aws-backup-source/lambda_copy_recovery_point.tf b/modules/aws-backup-source/lambda_copy_recovery_point.tf index 89cabbe..cf2e108 100644 --- a/modules/aws-backup-source/lambda_copy_recovery_point.tf +++ b/modules/aws-backup-source/lambda_copy_recovery_point.tf @@ -44,6 +44,28 @@ resource "aws_iam_policy" "iam_policy_for_lambda_copy_recovery_point" { Resource = "*" Effect = "Allow" }, + { + Action = [ + "kms:Decrypt", + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey", + "kms:CreateGrant" + ] + Resource = "*" + Effect = "Allow" + Condition = { + Bool = { "kms:GrantIsForAWSResource" = "true" } + } + }, + { + Action = [ + "rds:DescribeDBSnapshots" + ] + Resource = "*" + Effect = "Allow" + }, { Action = ["sts:AssumeRole"] Resource = var.lambda_copy_recovery_point_assume_role_arn == "" ? null : var.lambda_copy_recovery_point_assume_role_arn diff --git a/modules/aws-backup-source/lambda_restore_to_rds.tf b/modules/aws-backup-source/lambda_restore_to_rds.tf new file mode 100644 index 0000000..cba4f16 --- /dev/null +++ b/modules/aws-backup-source/lambda_restore_to_rds.tf @@ -0,0 +1,83 @@ +data "archive_file" "lambda_restore_to_rds_zip" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + type = "zip" + source_dir = "${path.module}/resources/restore-to-rds/" + output_path = "${path.module}/.terraform/archive_files/lambda_restore_to_rds.zip" +} + +resource "aws_iam_role" "iam_for_lambda_restore_to_rds" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + name = "${var.name_prefix}-lambda-restore-to-rds-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "lambda.amazonaws.com" } + }] + }) +} + +resource "aws_iam_policy" "iam_policy_for_lambda_restore_to_rds" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + name = "${var.name_prefix}-lambda-restore-to-rds-policy" + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ] + Resource = "arn:aws:logs:*:*:*" + Effect = "Allow" + }, + { + Action = [ + "backup:StartRestoreJob", + "backup:DescribeRestoreJob" + ] + Resource = "*" + Effect = "Allow" + }, + { + Action = "iam:PassRole" + Resource = aws_iam_role.backup.arn + Condition = { + StringEquals = { + "iam:PassedToService" : "backup.amazonaws.com" + } + } + Effect = "Allow" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "lambda_restore_to_rds_policy_attach" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + role = aws_iam_role.iam_for_lambda_restore_to_rds[0].name + policy_arn = aws_iam_policy.iam_policy_for_lambda_restore_to_rds[0].arn +} + +resource "aws_lambda_function" "lambda_restore_to_rds" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + function_name = "${var.name_prefix}_lambda-restore-to-rds" + role = aws_iam_role.iam_for_lambda_restore_to_rds[0].arn + handler = "restore_to_rds.lambda_handler" + runtime = "python3.12" + filename = data.archive_file.lambda_restore_to_rds_zip[0].output_path + source_code_hash = data.archive_file.lambda_restore_to_rds_zip[0].output_base64sha256 + timeout = var.lambda_restore_to_rds_max_wait_minutes * 60 + + environment { + variables = { + POLL_INTERVAL_SECONDS = var.lambda_restore_to_rds_poll_interval_seconds + MAX_WAIT_MINUTES = var.lambda_restore_to_rds_max_wait_minutes + IAM_ROLE_ARN = aws_iam_role.backup.arn + } + } +} diff --git a/modules/aws-backup-source/resources/restore-to-rds/README.md b/modules/aws-backup-source/resources/restore-to-rds/README.md new file mode 100644 index 0000000..51d97ca --- /dev/null +++ b/modules/aws-backup-source/resources/restore-to-rds/README.md @@ -0,0 +1,78 @@ +# Lambda Restore to RDS + +Starts or monitors an AWS Backup restore of an RDS recovery point into a new DB instance in the same account. + +Two modes: + +1. START: Provide required identifiers to create a new restored instance. +2. MONITOR: Provide an existing `restore_job_id` to poll until completion or timeout. + +## Event Contract + +START example: + +```json +{ + "recovery_point_arn": "arn:aws:backup:eu-west-2:123456789012:recovery-point:ABCDEF123456", + "db_instance_identifier": "restored-app-db" +} +``` + +Optional fields: + +- `db_instance_class` +- `db_subnet_group_name` +- `vpc_security_group_ids` +- `restore_metadata_overrides` +- `copy_source_tags_to_restored_resource` (boolean) + +MONITOR mode: + +```json +{ "restore_job_id": "1234abcd-job" } +``` + +## Environment Variables + +- `IAM_ROLE_ARN` – Backup service role (injected by Terraform) +- `POLL_INTERVAL_SECONDS` – Poll delay (default 30) +- `MAX_WAIT_MINUTES` – Max wait before 202 (default 10) + +## Behaviour + +- Same-account enforcement (must copy cross-account recovery points first). +- Supports optional copying of source backup tags. +- Returns HTTP 200 (completed), 500 (failed/aborted), or 202 (still running after timeout). + +## CLI Examples + +Start: + +```bash +AWS_PROFILE=code-ark-dev-2 aws lambda invoke \ + --function-name _lambda-restore-to-rds \ + --cli-binary-format raw-in-base64-out \ + --payload '{"recovery_point_arn":"","db_instance_identifier":"restored-db-1"}' \ + rds_restore_start.json +``` + +Monitor: + +```bash +AWS_PROFILE=code-ark-dev-2 aws lambda invoke \ + --function-name _lambda-restore-to-rds \ + --cli-binary-format raw-in-base64-out \ + --payload '{"restore_job_id":""}' \ + rds_restore_monitor.json +``` + +## Testing + +```bash +python test_restore_to_rds.py +``` + +## Notes + +- Copy recovery point locally first for air-gapped workflows. +- Use tag copying sparingly. diff --git a/modules/aws-backup-source/resources/restore-to-rds/restore_to_rds.py b/modules/aws-backup-source/resources/restore-to-rds/restore_to_rds.py new file mode 100644 index 0000000..ab1bfef --- /dev/null +++ b/modules/aws-backup-source/resources/restore-to-rds/restore_to_rds.py @@ -0,0 +1,170 @@ +"""Lambda to start or monitor an AWS Backup RDS restore job. + +Modes: +1. START: event supplies recovery_point_arn + db_instance_identifier (+ optional metadata) → starts restore. +2. MONITOR: event supplies restore_job_id → polls until terminal state or timeout. + +Parallels restore_to_s3 implementation for consistency (env-driven IAM role, polling loop, unified response). +""" +import os +import logging +import boto3 +import time +from botocore.exceptions import ClientError + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +backup_client = boto3.client('backup') +sts_client = boto3.client('sts') + +FINAL_STATES = ['COMPLETED', 'FAILED', 'ABORTED'] + + +def get_job_status(restore_job_id): + try: + job_details = backup_client.describe_restore_job(RestoreJobId=restore_job_id) + return job_details['Status'], job_details + except ClientError as e: + logger.error(f"Error checking job status for {restore_job_id}: {e.response['Error']['Message']}") + return 'FAILED', {'StatusMessage': f"API Error during status check: {e.response['Error']['Message']}"} + except Exception as e: + logger.error(f"Unexpected error checking job status: {str(e)}") + return 'FAILED', {'StatusMessage': f"Unexpected error: {str(e)}"} + + +def wait_for_job(restore_job_id, wait_seconds, max_wait_minutes): + max_checks = int((max_wait_minutes * 60) / wait_seconds) + current_status = 'PENDING' + logger.info(f"Polling restore job {restore_job_id} for up to {max_wait_minutes} minutes...") + for i in range(max_checks): + if current_status in FINAL_STATES: + logger.info(f"Job reached final status: {current_status}") + break + if i > 0: + logger.info(f"Waiting {wait_seconds} seconds... (Check {i + 1}/{max_checks})") + time.sleep(wait_seconds) + current_status, job_details = get_job_status(restore_job_id) + percent_done = job_details.get('PercentDone', '0.00%') + logger.info(f"Current Status: {current_status} ({percent_done} complete)") + final_status, final_details = get_job_status(restore_job_id) + return final_status, final_details + + +def lambda_handler(event, context): + try: + wait_seconds = int(os.environ.get('POLL_INTERVAL_SECONDS', '30')) + max_wait_minutes = int(os.environ.get('MAX_WAIT_MINUTES', '10')) + except ValueError: + return { + 'statusCode': 400, + 'body': {'message': 'Config Error: POLL_INTERVAL_SECONDS or MAX_WAIT_MINUTES must be integers.'} + } + + restore_job_id = event.get('restore_job_id') + if restore_job_id: + logger.info(f"Mode: MONITOR - Tracking existing restore job: {restore_job_id}") + final_status, final_details = wait_for_job(restore_job_id, wait_seconds, max_wait_minutes) + return _format_response(restore_job_id, final_status, final_details, max_wait_minutes) + + # Start new restore job + logger.info(f"Mode: START - Initiating new RDS restore job. Event: {event}") + recovery_point_arn = event.get('recovery_point_arn') + iam_role_arn = os.environ.get('IAM_ROLE_ARN') + db_instance_identifier = event.get('db_instance_identifier') + db_instance_class = event.get('db_instance_class') + db_subnet_group_name = event.get('db_subnet_group_name') + vpc_security_group_ids = event.get('vpc_security_group_ids') + restore_metadata_overrides = event.get('restore_metadata_overrides', {}) + + if not all([recovery_point_arn, db_instance_identifier]): + return { + 'statusCode': 400, + 'body': {'message': 'Missing required parameters: recovery_point_arn, db_instance_identifier.'} + } + if not iam_role_arn: + return { + 'statusCode': 500, + 'body': {'message': 'Configuration error: IAM_ROLE_ARN environment variable not set.'} + } + + # Enforce same-account restore (recovery point copy expected beforehand) + try: + if recovery_point_arn: + rp_account_id = recovery_point_arn.split(':')[4] + caller_account_id = sts_client.get_caller_identity()['Account'] + if rp_account_id != caller_account_id: + return { + 'statusCode': 400, + 'body': { + 'message': 'Recovery point account mismatch; copy to local vault via copy-recovery-point Lambda before RDS restore.', + 'recovery_point_account': rp_account_id, + 'lambda_account': caller_account_id + } + } + except Exception as e: + logger.warning(f"Account validation skipped: {e}") + + # Build Metadata for RDS restore + metadata = { + 'DBInstanceIdentifier': db_instance_identifier + } + if db_instance_class: + metadata['DBInstanceClass'] = db_instance_class + if db_subnet_group_name: + metadata['DBSubnetGroupName'] = db_subnet_group_name + if vpc_security_group_ids: + if isinstance(vpc_security_group_ids, list): + metadata['VpcSecurityGroupIds'] = ','.join(vpc_security_group_ids) + else: + metadata['VpcSecurityGroupIds'] = vpc_security_group_ids + # Merge in any overrides + metadata.update(restore_metadata_overrides) + + copy_source_tags = event.get('copy_source_tags_to_restored_resource', False) + + try: + start_args = { + 'RecoveryPointArn': recovery_point_arn, + 'Metadata': metadata, + 'IamRoleArn': iam_role_arn, + 'IdempotencyToken': context.aws_request_id, + 'ResourceType': 'RDS' + } + if isinstance(copy_source_tags, bool) and copy_source_tags: + start_args['CopySourceTagsToRestoredResource'] = True + start_response = backup_client.start_restore_job(**start_args) + restore_job_id = start_response['RestoreJobId'] + logger.info(f"Started RDS restore job: {restore_job_id}") + except ClientError as e: + error_message = f"Failed to start RDS restore job: {e.response['Error']['Message']}" + logger.error(error_message, exc_info=True) + return {'statusCode': 500, 'body': {'message': error_message}} + + final_status, final_details = wait_for_job(restore_job_id, wait_seconds, max_wait_minutes) + return _format_response(restore_job_id, final_status, final_details, max_wait_minutes) + +def _format_response(restore_job_id, final_status, final_details, max_wait_minutes): + if final_status == 'COMPLETED': + status_code = 200 + message = 'Restore job completed successfully.' + elif final_status in ['FAILED', 'ABORTED']: + status_code = 500 + message = f'Restore job failed/aborted. Message: {final_details.get("StatusMessage", "N/A")}' + else: + status_code = 202 + message = f'Restore job still running after max wait ({max_wait_minutes} mins). Final check status: {final_status}.' + completion_raw = final_details.get('CompletionDate', 'N/A') + if completion_raw == 'N/A': + completion_formatted = 'N/A' + else: + completion_formatted = completion_raw.isoformat() if hasattr(completion_raw, 'isoformat') else str(completion_raw) + return { + 'statusCode': status_code, + 'body': { + 'message': message, + 'restoreJobId': restore_job_id, + 'finalStatus': final_status, + 'completionDate': completion_formatted + } + } diff --git a/modules/aws-backup-source/resources/restore-to-rds/test_restore_to_rds.py b/modules/aws-backup-source/resources/restore-to-rds/test_restore_to_rds.py new file mode 100644 index 0000000..c429b35 --- /dev/null +++ b/modules/aws-backup-source/resources/restore-to-rds/test_restore_to_rds.py @@ -0,0 +1,99 @@ +import os +import unittest +from unittest.mock import patch +import restore_to_rds as rds + + +class TestRestoreToRDS(unittest.TestCase): + + @patch.dict(os.environ, { + 'IAM_ROLE_ARN': 'arn:aws:iam::123456789012:role/BackupRole', + 'POLL_INTERVAL_SECONDS': '1', + 'MAX_WAIT_MINUTES': '0' + }) + @patch.object(rds, 'backup_client') + @patch.object(rds, 'sts_client') + def test_start_restore_success(self, mock_sts, mock_backup): + mock_sts.get_caller_identity.return_value = {'Account': '123456789012'} + mock_backup.describe_restore_job.return_value = { + 'Status': 'COMPLETED', + 'PercentDone': '100.00%', + 'CompletionDate': rds.time.gmtime() + } + mock_backup.start_restore_job.return_value = {'RestoreJobId': 'job-123'} + event = { + 'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:ABC', + 'db_instance_identifier': 'restored-db' + } + context = type('ctx', (), {'aws_request_id': 'req-1'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 200) + self.assertEqual(resp['body']['restoreJobId'], 'job-123') + mock_backup.start_restore_job.assert_called_once() + + @patch.dict(os.environ, { + 'IAM_ROLE_ARN': 'arn:aws:iam::123456789012:role/BackupRole' + }) + def test_missing_required_params(self): + event = {'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:ABC'} + context = type('ctx', (), {'aws_request_id': 'req-2'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 400) + + @patch.dict(os.environ, { + 'POLL_INTERVAL_SECONDS': '30', + 'MAX_WAIT_MINUTES': '10' + }) + def test_missing_iam_role_env(self): + event = { + 'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:ABC', + 'db_instance_identifier': 'restored-db' + } + context = type('ctx', (), {'aws_request_id': 'req-3'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 500) + self.assertIn('IAM_ROLE_ARN', resp['body']['message']) + + @patch.dict(os.environ, { + 'IAM_ROLE_ARN': 'arn:aws:iam::123456789012:role/BackupRole' + }) + @patch.object(rds, 'sts_client') + def test_cross_account_blocked(self, mock_sts): + mock_sts.get_caller_identity.return_value = {'Account': '999999999999'} + event = { + 'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:ABC', + 'db_instance_identifier': 'restored-db' + } + context = type('ctx', (), {'aws_request_id': 'req-4'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 400) + self.assertIn('recovery_point_account', resp['body']) + + @patch.dict(os.environ, { + 'IAM_ROLE_ARN': 'arn:aws:iam::123456789012:role/BackupRole', + 'POLL_INTERVAL_SECONDS': '1', + 'MAX_WAIT_MINUTES': '0' + }) + @patch.object(rds, 'backup_client') + @patch.object(rds, 'sts_client') + def test_copy_source_tags_flag(self, mock_sts, mock_backup): + mock_sts.get_caller_identity.return_value = {'Account': '123456789012'} + mock_backup.describe_restore_job.return_value = { + 'Status': 'COMPLETED', + 'PercentDone': '100.00%', + 'CompletionDate': rds.time.gmtime() + } + mock_backup.start_restore_job.return_value = {'RestoreJobId': 'job-456'} + event = { + 'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:DEF', + 'db_instance_identifier': 'restored-db-2', + 'copy_source_tags_to_restored_resource': True + } + context = type('ctx', (), {'aws_request_id': 'req-5'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 200) + called_args = mock_backup.start_restore_job.call_args[1] + self.assertTrue(called_args.get('CopySourceTagsToRestoredResource')) + +if __name__ == '__main__': + unittest.main() diff --git a/modules/aws-backup-source/variables.tf b/modules/aws-backup-source/variables.tf index 6ca9252..06155f7 100644 --- a/modules/aws-backup-source/variables.tf +++ b/modules/aws-backup-source/variables.tf @@ -1,3 +1,20 @@ +variable "lambda_restore_to_rds_enable" { + description = "Flag to enable the restore-to-rds lambda." + type = bool + default = false +} + +variable "lambda_restore_to_rds_poll_interval_seconds" { + description = "Polling interval in seconds for RDS restore job status checks." + type = number + default = 30 +} + +variable "lambda_restore_to_rds_max_wait_minutes" { + description = "Maximum number of minutes to wait for an RDS restore job to reach a terminal state before returning running status." + type = number + default = 10 +} variable "project_name" { description = "The name of the project this relates to." type = string