diff --git a/examples/destination/aws-backups.tf b/examples/destination/aws-backups.tf index a43fae9..5ebf2f9 100644 --- a/examples/destination/aws-backups.tf +++ b/examples/destination/aws-backups.tf @@ -3,6 +3,12 @@ provider "aws" { region = "eu-west-2" } +variable "name_prefix" { + description = "Optional name prefix used by destination module for IAM role names" + type = string + default = "" +} + variable "source_terraform_role_arn" { description = "ARN of the terraform role in the source account" type = string @@ -21,6 +27,8 @@ locals { source_account_id = data.aws_arn.source_terraform_role.account destination_account_id = data.aws_caller_identity.current.account_id + + copy_recovery_role_name = var.name_prefix != "" ? "${var.name_prefix}-copy-recovery-point" : "copy-recovery-point" } @@ -41,6 +49,43 @@ resource "aws_kms_key" "destination_backup_key" { } Action = "kms:*" Resource = "*" + }, + { + Sid = "AllowCrossAccountBackupKeyOperations" + Effect = "Allow" + Principal = { + AWS = [ + "arn:aws:iam::${local.destination_account_id}:role/${local.copy_recovery_role_name}", + "arn:aws:iam::${local.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + Action = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ] + Resource = "*" + }, + { + Sid = "AllowCrossAccountBackupGrants" + Effect = "Allow" + Principal = { + AWS = [ + "arn:aws:iam::${local.destination_account_id}:role/${local.copy_recovery_role_name}", + "arn:aws:iam::${local.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + Action = [ + "kms:CreateGrant" + ] + Resource = "*" + Condition = { + Bool = { + "kms:GrantIsForAWSResource" = "true" + } + } } ] }) @@ -52,6 +97,7 @@ module "destination" { source_account_name = "source" # please note that the assigned value would be the prefix in aws_backup_vault.vault.name account_id = local.destination_account_id source_account_id = local.source_account_id + name_prefix = var.name_prefix kms_key = aws_kms_key.destination_backup_key.arn enable_vault_protection = false enable_iam_protection = false diff --git a/examples/source/aws-backups.tf b/examples/source/aws-backups.tf index bd4893c..335be4c 100644 --- a/examples/source/aws-backups.tf +++ b/examples/source/aws-backups.tf @@ -203,5 +203,8 @@ module "source" { lambda_copy_recovery_point_enable = var.copy_recovery_point_role_arn != "" lambda_copy_recovery_point_assume_role_arn = var.copy_recovery_point_role_arn + # Enable RDS restore Lambda + lambda_restore_to_rds_enable = true + } diff --git a/modules/aws-backup-destination/iam.tf b/modules/aws-backup-destination/iam.tf index 8596fb4..fdef8a3 100644 --- a/modules/aws-backup-destination/iam.tf +++ b/modules/aws-backup-destination/iam.tf @@ -4,100 +4,197 @@ ############################################# locals { - copy_recovery_role_name = coalesce(var.name_prefix, "") != "" ? "${var.name_prefix}-copy-recovery-point" : "copy-recovery-point" + copy_recovery_role_name = coalesce(var.name_prefix, "") != "" ? "${var.name_prefix}-copy-recovery-point" : "copy-recovery-point" } data "aws_iam_policy_document" "copy_recovery_point_assume" { - count = var.enable_cross_account_vault_access ? 1 : 0 + count = var.enable_cross_account_vault_access ? 1 : 0 - statement { - effect = "Allow" - principals { - type = "AWS" - identifiers = ["arn:aws:iam::${var.source_account_id}:root"] - } - actions = ["sts:AssumeRole"] - } + statement { + effect = "Allow" + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${var.source_account_id}:root"] + } + actions = ["sts:AssumeRole"] + } - # Allow AWS Backup service to assume when executing StartCopyJob in this account - statement { - effect = "Allow" - principals { - type = "Service" - identifiers = ["backup.amazonaws.com"] - } - actions = ["sts:AssumeRole"] - } + # Allow AWS Backup service to assume when executing StartCopyJob in this account + statement { + effect = "Allow" + principals { + type = "Service" + identifiers = [ + "backup.amazonaws.com", + "rds.amazonaws.com" + ] + } + actions = ["sts:AssumeRole"] + } } resource "aws_iam_role" "copy_recovery_point" { - count = var.enable_cross_account_vault_access ? 1 : 0 - name = local.copy_recovery_role_name - assume_role_policy = data.aws_iam_policy_document.copy_recovery_point_assume[0].json - description = "Role assumed by source account lambda to start and describe AWS Backup copy jobs, also passed to AWS Backup service for execution" - tags = { - ModuleComponent = "aws-backup-destination" - Purpose = "copy-recovery-point-cross-account" - } + count = var.enable_cross_account_vault_access ? 1 : 0 + name = local.copy_recovery_role_name + assume_role_policy = data.aws_iam_policy_document.copy_recovery_point_assume[0].json + description = "Role assumed by source account lambda to start and describe AWS Backup copy jobs, also passed to AWS Backup service for execution" + tags = { + ModuleComponent = "aws-backup-destination" + Purpose = "copy-recovery-point-cross-account" + } } data "aws_iam_policy_document" "copy_recovery_point_permissions" { - count = var.enable_cross_account_vault_access ? 1 : 0 + count = var.enable_cross_account_vault_access ? 1 : 0 - # Start copy job (resource-level supports recoveryPoint*) - statement { - effect = "Allow" - actions = [ - "backup:StartCopyJob" - ] - # Recovery points originate from the source account; allow any recovery point ARN pattern for that account & any region used via var.region - resources = ["arn:aws:backup:${var.region}:${var.account_id}:recovery-point:*"] - } + # Start copy job (resource-level supports recoveryPoint*) + statement { + effect = "Allow" + actions = [ + "backup:StartCopyJob" + ] + # Recovery points originate from the source account; allow any recovery point ARN pattern for that account & any region used via var.region + resources = ["arn:aws:backup:${var.region}:${var.account_id}:recovery-point:*"] + } - # Describe copy job (no resource-level restriction) - statement { - effect = "Allow" - actions = [ - "backup:DescribeCopyJob" + # Describe copy job (no resource-level restriction) + statement { + sid = "BackupServicePermissions" + effect = "Allow" + actions = [ + "backup:StartCopyJob", + "backup:CopyIntoBackupVault", + "backup:DescribeCopyJob", + "backup:DescribeBackupVault", + "backup:DescribeRecoveryPoint", + "backup:DescribeBackupJob", + "backup:GetBackupVaultAccessPolicy", + "backup:StopBackupJob", + "backup:ListRecoveryPointsByBackupVault", + "backup:ListCopyJobs", + "backup:GetRecoveryPointRestoreMetadata", + "backup:UpdateRecoveryPointLifecycle", + "backup:PutBackupVaultAccessPolicy", + "backup:ListRecoveryPointsByResource", + "backup:GetBackupPlan", + "backup:ListBackupJobs", + "backup:TagResource", + "backup:UntagResource", + "backup:ListTags", + "backup:ListBackupVaults", + "backup:CreateBackupVault", + "backup:GetBackupVaultNotifications", + "backup:PutBackupVaultNotifications", + "backup:DescribeProtectedResource", + "backup:ListProtectedResources" ] - resources = ["*"] - } + resources = ["*"] + } - statement { - effect = "Allow" - actions = [ + statement { + sid = "CopyBackupPermissions" + effect = "Allow" + actions = [ "backup:CopyIntoBackupVault", "backup:CopyFromBackupVault" ] - resources = [ + resources = [ "arn:aws:backup:${var.region}:${var.account_id}:recovery-point:*", "arn:aws:backup:${var.region}:${var.account_id}:backup-vault:${aws_backup_vault.vault.name}", - "arn:aws:backup:${var.region}:${var.source_account_id}:backup-vault:*" + "arn:aws:backup:${var.region}:${var.source_account_id}:backup-vault:*", + "arn:aws:rds:${var.region}:${var.account_id}:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:*", + "arn:aws:s3:::*", + "arn:aws:s3:::*/*", + "arn:aws:dynamodb:${var.region}:${var.account_id}:table/*", + "arn:aws:dynamodb:${var.region}:${var.source_account_id}:table/*", + "arn:aws:ec2:${var.region}:${var.account_id}:volume/*", + "arn:aws:ec2:${var.region}:${var.source_account_id}:volume/*", + "arn:aws:ec2:${var.region}:${var.account_id}:snapshot/*", + "arn:aws:ec2:${var.region}:${var.source_account_id}:snapshot/*", + "arn:aws:efs:${var.region}:${var.account_id}:file-system/*", + "arn:aws:efs:${var.region}:${var.source_account_id}:file-system/*" + ] + } + + statement { + sid = "RDSPermissions" + effect = "Allow" + actions = [ + "rds:CopyDBSnapshot", + "rds:DescribeDBSnapshots", + "rds:ModifyDBSnapshotAttribute", + "rds:DescribeDBInstances", + "rds:DescribeDBClusters", + "rds:CopyDBClusterSnapshot", + "rds:DescribeDBClusterSnapshots", + "rds:AddTagsToResource", + "rds:ListTagsForResource" + ] + resources = [ + "arn:aws:rds:${var.region}:${var.account_id}:db:*", + "arn:aws:rds:${var.region}:${var.account_id}:snapshot:*", + "arn:aws:rds:${var.region}:${var.account_id}:cluster:*", + "arn:aws:rds:${var.region}:${var.account_id}:cluster-snapshot:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:db:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:snapshot:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:cluster:*", + "arn:aws:rds:${var.region}:${var.source_account_id}:cluster-snapshot:*" + ] + } + + statement { + sid = "BackupTagPermissions" + effect = "Allow" + actions = [ + "backup:TagResource" + ] + resources = [ + "arn:aws:backup:${var.region}:${var.account_id}:recovery-point:*" + ] + } + + statement { + sid = "KMSPermissions" + effect = "Allow" + actions = [ + "kms:Decrypt", + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey", + "kms:CreateGrant", + "kms:RetireGrant", + "kms:ListGrants" + ] + resources = [ + "arn:aws:kms:${var.region}:${var.account_id}:key/*" ] - } + } - # Pass this role to AWS Backup service when invoking StartCopyJob with IamRoleArn - statement { - effect = "Allow" - actions = ["iam:PassRole"] - resources = [aws_iam_role.copy_recovery_point[0].arn] - condition { - test = "StringEquals" - variable = "iam:PassedToService" - values = ["backup.amazonaws.com"] - } - } + # Pass this role to AWS Backup service when invoking StartCopyJob with IamRoleArn + statement { + sid = "IAMPermissions" + effect = "Allow" + actions = ["iam:PassRole"] + resources = [aws_iam_role.copy_recovery_point[0].arn] + condition { + test = "StringEquals" + variable = "iam:PassedToService" + values = ["backup.amazonaws.com"] + } + } } resource "aws_iam_role_policy" "copy_recovery_point_policy" { - count = var.enable_cross_account_vault_access ? 1 : 0 - name = "${local.copy_recovery_role_name}-policy" - role = aws_iam_role.copy_recovery_point[0].id - policy = data.aws_iam_policy_document.copy_recovery_point_permissions[0].json + count = var.enable_cross_account_vault_access ? 1 : 0 + name = "${local.copy_recovery_role_name}-policy" + role = aws_iam_role.copy_recovery_point[0].id + policy = data.aws_iam_policy_document.copy_recovery_point_permissions[0].json } output "copy_recovery_point_role_arn" { - description = "ARN of role to assume from source account lambda (set ASSUME_ROLE_ARN to this). Only present if enabled." - value = try(aws_iam_role.copy_recovery_point[0].arn, null) - depends_on = [aws_iam_role.copy_recovery_point] + description = "ARN of role to assume from source account lambda (set ASSUME_ROLE_ARN to this). Only present if enabled." + value = try(aws_iam_role.copy_recovery_point[0].arn, null) + depends_on = [aws_iam_role.copy_recovery_point] } diff --git a/modules/aws-backup-destination/parameter_store_kms.tf b/modules/aws-backup-destination/parameter_store_kms.tf index dc5e068..4497df5 100644 --- a/modules/aws-backup-destination/parameter_store_kms.tf +++ b/modules/aws-backup-destination/parameter_store_kms.tf @@ -30,6 +30,101 @@ data "aws_iam_policy_document" "kms_key_policy" { resources = ["*"] } } + + dynamic "statement" { + for_each = var.enable_cross_account_vault_access ? ["allow_backup_key_ops"] : [] + + content { + sid = "AllowCrossAccountBackupKeyOperations" + effect = "Allow" + principals { + type = "AWS" + identifiers = [ + try(aws_iam_role.copy_recovery_point[0].arn, ""), + "arn:aws:iam::${var.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ] + resources = ["*"] + } + } + + dynamic "statement" { + for_each = var.enable_cross_account_vault_access ? ["allow_backup_grants"] : [] + + content { + sid = "AllowCrossAccountBackupGrants" + effect = "Allow" + principals { + type = "AWS" + identifiers = [ + try(aws_iam_role.copy_recovery_point[0].arn, ""), + "arn:aws:iam::${var.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + actions = [ + "kms:CreateGrant" + ] + resources = ["*"] + condition { + test = "Bool" + variable = "kms:GrantIsForAWSResource" + values = ["true"] + } + } + } + + # Additional explicit cross-account backup role permissions mirroring example policy structure + dynamic "statement" { + for_each = var.enable_cross_account_vault_access ? ["add_explicit_backup_key_ops"] : [] + content { + sid = "AllowCrossAccountBackupKeyOperationsExplicit" + effect = "Allow" + principals { + type = "AWS" + identifiers = [ + try(aws_iam_role.copy_recovery_point[0].arn, ""), + "arn:aws:iam::${var.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ] + resources = ["*"] + } + } + + dynamic "statement" { + for_each = var.enable_cross_account_vault_access ? ["add_explicit_backup_grants"] : [] + content { + sid = "AllowCrossAccountBackupGrantsExplicit" + effect = "Allow" + principals { + type = "AWS" + identifiers = [ + try(aws_iam_role.copy_recovery_point[0].arn, ""), + "arn:aws:iam::${var.source_account_id}:role/aws-service-role/backup.amazonaws.com/AWSServiceRoleForBackup" + ] + } + actions = ["kms:CreateGrant"] + resources = ["*"] + condition { + test = "Bool" + variable = "kms:GrantIsForAWSResource" + values = ["true"] + } + } + } } resource "aws_kms_key" "parameter_store_key" { diff --git a/modules/aws-backup-source/backup_vault_policy.tf b/modules/aws-backup-source/backup_vault_policy.tf index 392394d..37c9302 100644 --- a/modules/aws-backup-source/backup_vault_policy.tf +++ b/modules/aws-backup-source/backup_vault_policy.tf @@ -40,7 +40,10 @@ data "aws_iam_policy_document" "vault_policy" { principals { type = "AWS" - identifiers = ["arn:aws:iam::${var.backup_copy_vault_account_id}:root"] + identifiers = [ + "arn:aws:iam::${var.backup_copy_vault_account_id}:root", + "arn:aws:iam::${var.backup_copy_vault_account_id}:role/${var.name_prefix}-copy-recovery-point" + ] } } } diff --git a/modules/aws-backup-source/lambda_copy_recovery_point.tf b/modules/aws-backup-source/lambda_copy_recovery_point.tf index 89cabbe..cf2e108 100644 --- a/modules/aws-backup-source/lambda_copy_recovery_point.tf +++ b/modules/aws-backup-source/lambda_copy_recovery_point.tf @@ -44,6 +44,28 @@ resource "aws_iam_policy" "iam_policy_for_lambda_copy_recovery_point" { Resource = "*" Effect = "Allow" }, + { + Action = [ + "kms:Decrypt", + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey", + "kms:CreateGrant" + ] + Resource = "*" + Effect = "Allow" + Condition = { + Bool = { "kms:GrantIsForAWSResource" = "true" } + } + }, + { + Action = [ + "rds:DescribeDBSnapshots" + ] + Resource = "*" + Effect = "Allow" + }, { Action = ["sts:AssumeRole"] Resource = var.lambda_copy_recovery_point_assume_role_arn == "" ? null : var.lambda_copy_recovery_point_assume_role_arn diff --git a/modules/aws-backup-source/lambda_restore_to_rds.tf b/modules/aws-backup-source/lambda_restore_to_rds.tf new file mode 100644 index 0000000..cba4f16 --- /dev/null +++ b/modules/aws-backup-source/lambda_restore_to_rds.tf @@ -0,0 +1,83 @@ +data "archive_file" "lambda_restore_to_rds_zip" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + type = "zip" + source_dir = "${path.module}/resources/restore-to-rds/" + output_path = "${path.module}/.terraform/archive_files/lambda_restore_to_rds.zip" +} + +resource "aws_iam_role" "iam_for_lambda_restore_to_rds" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + name = "${var.name_prefix}-lambda-restore-to-rds-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { Service = "lambda.amazonaws.com" } + }] + }) +} + +resource "aws_iam_policy" "iam_policy_for_lambda_restore_to_rds" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + name = "${var.name_prefix}-lambda-restore-to-rds-policy" + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ] + Resource = "arn:aws:logs:*:*:*" + Effect = "Allow" + }, + { + Action = [ + "backup:StartRestoreJob", + "backup:DescribeRestoreJob" + ] + Resource = "*" + Effect = "Allow" + }, + { + Action = "iam:PassRole" + Resource = aws_iam_role.backup.arn + Condition = { + StringEquals = { + "iam:PassedToService" : "backup.amazonaws.com" + } + } + Effect = "Allow" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "lambda_restore_to_rds_policy_attach" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + role = aws_iam_role.iam_for_lambda_restore_to_rds[0].name + policy_arn = aws_iam_policy.iam_policy_for_lambda_restore_to_rds[0].arn +} + +resource "aws_lambda_function" "lambda_restore_to_rds" { + count = var.lambda_restore_to_rds_enable ? 1 : 0 + function_name = "${var.name_prefix}_lambda-restore-to-rds" + role = aws_iam_role.iam_for_lambda_restore_to_rds[0].arn + handler = "restore_to_rds.lambda_handler" + runtime = "python3.12" + filename = data.archive_file.lambda_restore_to_rds_zip[0].output_path + source_code_hash = data.archive_file.lambda_restore_to_rds_zip[0].output_base64sha256 + timeout = var.lambda_restore_to_rds_max_wait_minutes * 60 + + environment { + variables = { + POLL_INTERVAL_SECONDS = var.lambda_restore_to_rds_poll_interval_seconds + MAX_WAIT_MINUTES = var.lambda_restore_to_rds_max_wait_minutes + IAM_ROLE_ARN = aws_iam_role.backup.arn + } + } +} diff --git a/modules/aws-backup-source/resources/restore-to-rds/README.md b/modules/aws-backup-source/resources/restore-to-rds/README.md new file mode 100644 index 0000000..51d97ca --- /dev/null +++ b/modules/aws-backup-source/resources/restore-to-rds/README.md @@ -0,0 +1,78 @@ +# Lambda Restore to RDS + +Starts or monitors an AWS Backup restore of an RDS recovery point into a new DB instance in the same account. + +Two modes: + +1. START: Provide required identifiers to create a new restored instance. +2. MONITOR: Provide an existing `restore_job_id` to poll until completion or timeout. + +## Event Contract + +START example: + +```json +{ + "recovery_point_arn": "arn:aws:backup:eu-west-2:123456789012:recovery-point:ABCDEF123456", + "db_instance_identifier": "restored-app-db" +} +``` + +Optional fields: + +- `db_instance_class` +- `db_subnet_group_name` +- `vpc_security_group_ids` +- `restore_metadata_overrides` +- `copy_source_tags_to_restored_resource` (boolean) + +MONITOR mode: + +```json +{ "restore_job_id": "1234abcd-job" } +``` + +## Environment Variables + +- `IAM_ROLE_ARN` – Backup service role (injected by Terraform) +- `POLL_INTERVAL_SECONDS` – Poll delay (default 30) +- `MAX_WAIT_MINUTES` – Max wait before 202 (default 10) + +## Behaviour + +- Same-account enforcement (must copy cross-account recovery points first). +- Supports optional copying of source backup tags. +- Returns HTTP 200 (completed), 500 (failed/aborted), or 202 (still running after timeout). + +## CLI Examples + +Start: + +```bash +AWS_PROFILE=code-ark-dev-2 aws lambda invoke \ + --function-name _lambda-restore-to-rds \ + --cli-binary-format raw-in-base64-out \ + --payload '{"recovery_point_arn":"","db_instance_identifier":"restored-db-1"}' \ + rds_restore_start.json +``` + +Monitor: + +```bash +AWS_PROFILE=code-ark-dev-2 aws lambda invoke \ + --function-name _lambda-restore-to-rds \ + --cli-binary-format raw-in-base64-out \ + --payload '{"restore_job_id":""}' \ + rds_restore_monitor.json +``` + +## Testing + +```bash +python test_restore_to_rds.py +``` + +## Notes + +- Copy recovery point locally first for air-gapped workflows. +- Use tag copying sparingly. diff --git a/modules/aws-backup-source/resources/restore-to-rds/restore_to_rds.py b/modules/aws-backup-source/resources/restore-to-rds/restore_to_rds.py new file mode 100644 index 0000000..ab1bfef --- /dev/null +++ b/modules/aws-backup-source/resources/restore-to-rds/restore_to_rds.py @@ -0,0 +1,170 @@ +"""Lambda to start or monitor an AWS Backup RDS restore job. + +Modes: +1. START: event supplies recovery_point_arn + db_instance_identifier (+ optional metadata) → starts restore. +2. MONITOR: event supplies restore_job_id → polls until terminal state or timeout. + +Parallels restore_to_s3 implementation for consistency (env-driven IAM role, polling loop, unified response). +""" +import os +import logging +import boto3 +import time +from botocore.exceptions import ClientError + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +backup_client = boto3.client('backup') +sts_client = boto3.client('sts') + +FINAL_STATES = ['COMPLETED', 'FAILED', 'ABORTED'] + + +def get_job_status(restore_job_id): + try: + job_details = backup_client.describe_restore_job(RestoreJobId=restore_job_id) + return job_details['Status'], job_details + except ClientError as e: + logger.error(f"Error checking job status for {restore_job_id}: {e.response['Error']['Message']}") + return 'FAILED', {'StatusMessage': f"API Error during status check: {e.response['Error']['Message']}"} + except Exception as e: + logger.error(f"Unexpected error checking job status: {str(e)}") + return 'FAILED', {'StatusMessage': f"Unexpected error: {str(e)}"} + + +def wait_for_job(restore_job_id, wait_seconds, max_wait_minutes): + max_checks = int((max_wait_minutes * 60) / wait_seconds) + current_status = 'PENDING' + logger.info(f"Polling restore job {restore_job_id} for up to {max_wait_minutes} minutes...") + for i in range(max_checks): + if current_status in FINAL_STATES: + logger.info(f"Job reached final status: {current_status}") + break + if i > 0: + logger.info(f"Waiting {wait_seconds} seconds... (Check {i + 1}/{max_checks})") + time.sleep(wait_seconds) + current_status, job_details = get_job_status(restore_job_id) + percent_done = job_details.get('PercentDone', '0.00%') + logger.info(f"Current Status: {current_status} ({percent_done} complete)") + final_status, final_details = get_job_status(restore_job_id) + return final_status, final_details + + +def lambda_handler(event, context): + try: + wait_seconds = int(os.environ.get('POLL_INTERVAL_SECONDS', '30')) + max_wait_minutes = int(os.environ.get('MAX_WAIT_MINUTES', '10')) + except ValueError: + return { + 'statusCode': 400, + 'body': {'message': 'Config Error: POLL_INTERVAL_SECONDS or MAX_WAIT_MINUTES must be integers.'} + } + + restore_job_id = event.get('restore_job_id') + if restore_job_id: + logger.info(f"Mode: MONITOR - Tracking existing restore job: {restore_job_id}") + final_status, final_details = wait_for_job(restore_job_id, wait_seconds, max_wait_minutes) + return _format_response(restore_job_id, final_status, final_details, max_wait_minutes) + + # Start new restore job + logger.info(f"Mode: START - Initiating new RDS restore job. Event: {event}") + recovery_point_arn = event.get('recovery_point_arn') + iam_role_arn = os.environ.get('IAM_ROLE_ARN') + db_instance_identifier = event.get('db_instance_identifier') + db_instance_class = event.get('db_instance_class') + db_subnet_group_name = event.get('db_subnet_group_name') + vpc_security_group_ids = event.get('vpc_security_group_ids') + restore_metadata_overrides = event.get('restore_metadata_overrides', {}) + + if not all([recovery_point_arn, db_instance_identifier]): + return { + 'statusCode': 400, + 'body': {'message': 'Missing required parameters: recovery_point_arn, db_instance_identifier.'} + } + if not iam_role_arn: + return { + 'statusCode': 500, + 'body': {'message': 'Configuration error: IAM_ROLE_ARN environment variable not set.'} + } + + # Enforce same-account restore (recovery point copy expected beforehand) + try: + if recovery_point_arn: + rp_account_id = recovery_point_arn.split(':')[4] + caller_account_id = sts_client.get_caller_identity()['Account'] + if rp_account_id != caller_account_id: + return { + 'statusCode': 400, + 'body': { + 'message': 'Recovery point account mismatch; copy to local vault via copy-recovery-point Lambda before RDS restore.', + 'recovery_point_account': rp_account_id, + 'lambda_account': caller_account_id + } + } + except Exception as e: + logger.warning(f"Account validation skipped: {e}") + + # Build Metadata for RDS restore + metadata = { + 'DBInstanceIdentifier': db_instance_identifier + } + if db_instance_class: + metadata['DBInstanceClass'] = db_instance_class + if db_subnet_group_name: + metadata['DBSubnetGroupName'] = db_subnet_group_name + if vpc_security_group_ids: + if isinstance(vpc_security_group_ids, list): + metadata['VpcSecurityGroupIds'] = ','.join(vpc_security_group_ids) + else: + metadata['VpcSecurityGroupIds'] = vpc_security_group_ids + # Merge in any overrides + metadata.update(restore_metadata_overrides) + + copy_source_tags = event.get('copy_source_tags_to_restored_resource', False) + + try: + start_args = { + 'RecoveryPointArn': recovery_point_arn, + 'Metadata': metadata, + 'IamRoleArn': iam_role_arn, + 'IdempotencyToken': context.aws_request_id, + 'ResourceType': 'RDS' + } + if isinstance(copy_source_tags, bool) and copy_source_tags: + start_args['CopySourceTagsToRestoredResource'] = True + start_response = backup_client.start_restore_job(**start_args) + restore_job_id = start_response['RestoreJobId'] + logger.info(f"Started RDS restore job: {restore_job_id}") + except ClientError as e: + error_message = f"Failed to start RDS restore job: {e.response['Error']['Message']}" + logger.error(error_message, exc_info=True) + return {'statusCode': 500, 'body': {'message': error_message}} + + final_status, final_details = wait_for_job(restore_job_id, wait_seconds, max_wait_minutes) + return _format_response(restore_job_id, final_status, final_details, max_wait_minutes) + +def _format_response(restore_job_id, final_status, final_details, max_wait_minutes): + if final_status == 'COMPLETED': + status_code = 200 + message = 'Restore job completed successfully.' + elif final_status in ['FAILED', 'ABORTED']: + status_code = 500 + message = f'Restore job failed/aborted. Message: {final_details.get("StatusMessage", "N/A")}' + else: + status_code = 202 + message = f'Restore job still running after max wait ({max_wait_minutes} mins). Final check status: {final_status}.' + completion_raw = final_details.get('CompletionDate', 'N/A') + if completion_raw == 'N/A': + completion_formatted = 'N/A' + else: + completion_formatted = completion_raw.isoformat() if hasattr(completion_raw, 'isoformat') else str(completion_raw) + return { + 'statusCode': status_code, + 'body': { + 'message': message, + 'restoreJobId': restore_job_id, + 'finalStatus': final_status, + 'completionDate': completion_formatted + } + } diff --git a/modules/aws-backup-source/resources/restore-to-rds/test_restore_to_rds.py b/modules/aws-backup-source/resources/restore-to-rds/test_restore_to_rds.py new file mode 100644 index 0000000..c429b35 --- /dev/null +++ b/modules/aws-backup-source/resources/restore-to-rds/test_restore_to_rds.py @@ -0,0 +1,99 @@ +import os +import unittest +from unittest.mock import patch +import restore_to_rds as rds + + +class TestRestoreToRDS(unittest.TestCase): + + @patch.dict(os.environ, { + 'IAM_ROLE_ARN': 'arn:aws:iam::123456789012:role/BackupRole', + 'POLL_INTERVAL_SECONDS': '1', + 'MAX_WAIT_MINUTES': '0' + }) + @patch.object(rds, 'backup_client') + @patch.object(rds, 'sts_client') + def test_start_restore_success(self, mock_sts, mock_backup): + mock_sts.get_caller_identity.return_value = {'Account': '123456789012'} + mock_backup.describe_restore_job.return_value = { + 'Status': 'COMPLETED', + 'PercentDone': '100.00%', + 'CompletionDate': rds.time.gmtime() + } + mock_backup.start_restore_job.return_value = {'RestoreJobId': 'job-123'} + event = { + 'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:ABC', + 'db_instance_identifier': 'restored-db' + } + context = type('ctx', (), {'aws_request_id': 'req-1'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 200) + self.assertEqual(resp['body']['restoreJobId'], 'job-123') + mock_backup.start_restore_job.assert_called_once() + + @patch.dict(os.environ, { + 'IAM_ROLE_ARN': 'arn:aws:iam::123456789012:role/BackupRole' + }) + def test_missing_required_params(self): + event = {'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:ABC'} + context = type('ctx', (), {'aws_request_id': 'req-2'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 400) + + @patch.dict(os.environ, { + 'POLL_INTERVAL_SECONDS': '30', + 'MAX_WAIT_MINUTES': '10' + }) + def test_missing_iam_role_env(self): + event = { + 'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:ABC', + 'db_instance_identifier': 'restored-db' + } + context = type('ctx', (), {'aws_request_id': 'req-3'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 500) + self.assertIn('IAM_ROLE_ARN', resp['body']['message']) + + @patch.dict(os.environ, { + 'IAM_ROLE_ARN': 'arn:aws:iam::123456789012:role/BackupRole' + }) + @patch.object(rds, 'sts_client') + def test_cross_account_blocked(self, mock_sts): + mock_sts.get_caller_identity.return_value = {'Account': '999999999999'} + event = { + 'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:ABC', + 'db_instance_identifier': 'restored-db' + } + context = type('ctx', (), {'aws_request_id': 'req-4'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 400) + self.assertIn('recovery_point_account', resp['body']) + + @patch.dict(os.environ, { + 'IAM_ROLE_ARN': 'arn:aws:iam::123456789012:role/BackupRole', + 'POLL_INTERVAL_SECONDS': '1', + 'MAX_WAIT_MINUTES': '0' + }) + @patch.object(rds, 'backup_client') + @patch.object(rds, 'sts_client') + def test_copy_source_tags_flag(self, mock_sts, mock_backup): + mock_sts.get_caller_identity.return_value = {'Account': '123456789012'} + mock_backup.describe_restore_job.return_value = { + 'Status': 'COMPLETED', + 'PercentDone': '100.00%', + 'CompletionDate': rds.time.gmtime() + } + mock_backup.start_restore_job.return_value = {'RestoreJobId': 'job-456'} + event = { + 'recovery_point_arn': 'arn:aws:backup:eu-west-2:123456789012:recovery-point:DEF', + 'db_instance_identifier': 'restored-db-2', + 'copy_source_tags_to_restored_resource': True + } + context = type('ctx', (), {'aws_request_id': 'req-5'}) + resp = rds.lambda_handler(event, context) + self.assertEqual(resp['statusCode'], 200) + called_args = mock_backup.start_restore_job.call_args[1] + self.assertTrue(called_args.get('CopySourceTagsToRestoredResource')) + +if __name__ == '__main__': + unittest.main() diff --git a/modules/aws-backup-source/variables.tf b/modules/aws-backup-source/variables.tf index 6ca9252..06155f7 100644 --- a/modules/aws-backup-source/variables.tf +++ b/modules/aws-backup-source/variables.tf @@ -1,3 +1,20 @@ +variable "lambda_restore_to_rds_enable" { + description = "Flag to enable the restore-to-rds lambda." + type = bool + default = false +} + +variable "lambda_restore_to_rds_poll_interval_seconds" { + description = "Polling interval in seconds for RDS restore job status checks." + type = number + default = 30 +} + +variable "lambda_restore_to_rds_max_wait_minutes" { + description = "Maximum number of minutes to wait for an RDS restore job to reach a terminal state before returning running status." + type = number + default = 10 +} variable "project_name" { description = "The name of the project this relates to." type = string