From a60dec3d61c8b9a4f3c6ef74f67acc5b91c2391c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radoslav=20Bod=C3=B3?= <bodik@cesnet.cz> Date: Tue, 2 Apr 2024 17:23:38 +0200 Subject: [PATCH] rwm: add storage_drop_versions --- Makefile | 2 +- README.md | 19 +++++++---- rwm.py | 77 ++++++++++++++++++++++++++++++++----------- tests/test_default.py | 2 ++ tests/test_rwm.py | 26 ++++++++------- tests/test_storage.py | 26 +++++++++++++++ 6 files changed, 112 insertions(+), 40 deletions(-) diff --git a/Makefile b/Makefile index 03ca01a..90b98a8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: coverage lint install: - apt-get -y install awscli python3-cryptography python3-tabulate rclone restic yamllint + apt-get -y install awscli python3-boto3 python3-cryptography python3-tabulate rclone restic yamllint install-dev: apt-get -y install python3-venv snapd diff --git a/README.md b/README.md index 41c4401..30e182a 100644 --- a/README.md +++ b/README.md @@ -26,23 +26,25 @@ versions of the bucket objects reclaiming free space on the underlying storage. must be delegated to secure element residing outside of attacker's reach and would use privileged credentials for the managed bucket. -RWM can: + +## Features * low-level S3 access for aws cli and rclone * rclone "crypt over S3" backend * restic with S3 repository -* configurable backup manager/executor - -* create, delete and list policed storage buckets -* check if used bucket is configured with expected policies +* simple backup manager/executor +* storage manager + * create, delete and list policed storage buckets + * check if used bucket is configured with expected policies + * drop all versions to reclaim storage space TODO: * generate and store current bucket state state-data * recreate bucket contents on local filesystem (or remote bucket) acording to specified state data * ??? check completeness of the current state of the bucket -* prune all non-recent object versions to reclaim storage space -* unlike in other backup solutions, attacker with credentials can restore any old data from the repository/bucket +* unlike in other backup solutions, attacker with credentials can restore + old data from the repository/bucket, this should be discussed (howto threat modeling ?) ## Usage @@ -87,6 +89,9 @@ rwm storage_check_policy bucket1 rwm backup_all rwm restic snapshots rwm restic mount /mnt/restore + +# if current storage state is consistent, one can drop old object versions from time to time to reclaim storage space +rwm --confg admin.conf storage_drop_versions bucket1 ``` diff --git a/rwm.py b/rwm.py index a04cd64..7409edb 100755 --- a/rwm.py +++ b/rwm.py @@ -198,6 +198,7 @@ class StorageManager: bucket.objects.all().delete() bucket.object_versions.all().delete() bucket.delete() + return 0 @staticmethod def _policy_statements_admin(policy): @@ -255,6 +256,24 @@ class StorageManager: return output + def storage_drop_versions(self, bucket_name): + """deletes all old versions and delete markers from storage to reclaim space""" + + # ? lock repo + + # drop all active object versions + object_versions = self.s3.meta.client.list_object_versions(Bucket=bucket_name) + for item in object_versions["Versions"]: + if not item["IsLatest"]: + self.s3.ObjectVersion(bucket_name, item["Key"], item["VersionId"]).delete() + + # drop all delete markers + object_versions = self.s3.meta.client.list_object_versions(Bucket=bucket_name) + for item in object_versions["DeleteMarkers"]: + self.s3.ObjectVersion(bucket_name, item["Key"], item["VersionId"]).delete() + + return 0 + class RWM: """rwm impl""" @@ -443,6 +462,11 @@ class RWM: )) return 0 + def storage_drop_versions_cmd(self, bucket_name): + """storage_drop_versions command""" + + return self.storage_manager.storage_drop_versions(bucket_name) + def configure_logging(debug): """configure logger""" @@ -466,34 +490,45 @@ def parse_arguments(argv): parser.add_argument("--config", default="rwm.conf") subparsers = parser.add_subparsers(title="commands", dest="command", required=False) + subparsers.add_parser("version", help="show version") - aws_cmd_parser = subparsers.add_parser("aws", help="aws command") + aws_cmd_parser = subparsers.add_parser("aws", help="run aws cli") aws_cmd_parser.add_argument("cmd_args", nargs="*") - rc_cmd_parser = subparsers.add_parser("rclone", help="rclone command") - rc_cmd_parser.add_argument("cmd_args", nargs="*") - rcc_cmd_parser = subparsers.add_parser("rclone_crypt", help="rclone command with crypt overlay") - rcc_cmd_parser.add_argument("cmd_args", nargs="*") - res_cmd_parser = subparsers.add_parser("restic", help="restic command") - res_cmd_parser.add_argument("cmd_args", nargs="*") - - backup_cmd_parser = subparsers.add_parser("backup", help="backup command") - backup_cmd_parser.add_argument("name", help="backup config name") - _ = subparsers.add_parser("backup_all", help="backup all command") - - storage_create_cmd_parser = subparsers.add_parser("storage_create", help="storage_create command") + rclone_cmd_parser = subparsers.add_parser("rclone", help="run rclone") + rclone_cmd_parser.add_argument("cmd_args", nargs="*") + rclone_crypt_cmd_parser = subparsers.add_parser("rclone_crypt", help="run rclone with crypt overlay") + rclone_crypt_cmd_parser.add_argument("cmd_args", nargs="*") + restic_cmd_parser = subparsers.add_parser("restic", help="run restic") + restic_cmd_parser.add_argument("cmd_args", nargs="*") + + backup_cmd_parser = subparsers.add_parser("backup", help="perform backup") + backup_cmd_parser.add_argument("name", help="backup name") + + _ = subparsers.add_parser("backup_all", help="run all backups in config") + + storage_create_cmd_parser = subparsers.add_parser("storage_create", help="create policed storage bucked") storage_create_cmd_parser.add_argument("bucket_name", help="bucket name") - storage_create_cmd_parser.add_argument("target_username", help="actual bucket user with limited RW access") - storage_delete_cmd_parser = subparsers.add_parser("storage_delete", help="storage_delete command") + storage_create_cmd_parser.add_argument("target_username", help="user to be granted limited RW access") + + storage_delete_cmd_parser = subparsers.add_parser("storage_delete", help="delete storage") storage_delete_cmd_parser.add_argument("bucket_name", help="bucket name") - storage_check_policy_cmd_parser = subparsers.add_parser("storage_check_policy", help="storage_check_policy command; use --debug to show policy") + + storage_check_policy_cmd_parser = subparsers.add_parser("storage_check_policy", help="check bucket policies; use --debug to show policy") storage_check_policy_cmd_parser.add_argument("bucket_name", help="bucket name") - _ = subparsers.add_parser("storage_list", help="storage_list command") + + _ = subparsers.add_parser("storage_list", help="list storages") + + storage_drop_versions_cmd_parser = subparsers.add_parser( + "storage_drop_versions", + help="reclaim storage space; drops any old object versions from bucket" + ) + storage_drop_versions_cmd_parser.add_argument("bucket_name", help="bucket name") return parser.parse_args(argv) -def main(argv=None): +def main(argv=None): # pylint: disable=too-many-branches """main""" args = parse_arguments(argv) @@ -505,12 +540,12 @@ def main(argv=None): logger.debug("config, %s", config) # assert config ? rwmi = RWM(config) + ret = -1 if args.command == "version": print(__version__) - return 0 + ret = 0 - ret = -1 if args.command == "aws": ret = wrap_output(rwmi.aws_cmd(args.cmd_args)) if args.command == "rclone": @@ -535,6 +570,8 @@ def main(argv=None): ret = rwmi.storage_check_policy_cmd(args.bucket_name) if args.command == "storage_list": ret = rwmi.storage_list_cmd() + if args.command == "storage_drop_versions": + ret = rwmi.storage_drop_versions_cmd(args.bucket_name) logger.debug("rwm finished with %s (ret %d)", "success" if ret == 0 else "errors", ret) return ret diff --git a/tests/test_default.py b/tests/test_default.py index f4e8e4f..98546ef 100644 --- a/tests/test_default.py +++ b/tests/test_default.py @@ -56,3 +56,5 @@ def test_main(tmpworkdir: str): # pylint: disable=unused-argument assert rwm_main(["storage_check_policy", "bucket"]) == 0 with patch.object(rwm.RWM, "storage_list_cmd", mock_ok): assert rwm_main(["storage_list"]) == 0 + with patch.object(rwm.RWM, "storage_drop_versions_cmd", mock_ok): + assert rwm_main(["storage_drop_versions", "bucket"]) == 0 diff --git a/tests/test_rwm.py b/tests/test_rwm.py index 6da1bfa..d8c22e5 100644 --- a/tests/test_rwm.py +++ b/tests/test_rwm.py @@ -273,29 +273,31 @@ def test_storage_delete_cmd(tmpworkdir: str, microceph: str, radosuser_admin: rw assert trwm.storage_delete_cmd(bucket_name) == 1 -def test_storage_check_policy_cmd(tmpworkdir: str, microceph: str, radosuser_admin: rwm.StorageManager): # pylint: disable=unused-argument +def test_storage_check_policy_cmd(tmpworkdir: str): # pylint: disable=unused-argument """test storage check policy command""" - trwm = rwm.RWM({ - "rwm_s3_endpoint_url": radosuser_admin.url, - "rwm_s3_access_key": radosuser_admin.access_key, - "rwm_s3_secret_key": radosuser_admin.secret_key, - }) + trwm = rwm.RWM({}) mock = Mock(return_value=False) with patch.object(rwm.StorageManager, "storage_check_policy", mock): assert trwm.storage_check_policy_cmd("dummy") == 1 -def test_storage_list_cmd(tmpworkdir: str, microceph: str, radosuser_admin: rwm.StorageManager): # pylint: disable=unused-argument +def test_storage_list_cmd(tmpworkdir: str): # pylint: disable=unused-argument """test storage check policy command""" - trwm = rwm.RWM({ - "rwm_s3_endpoint_url": radosuser_admin.url, - "rwm_s3_access_key": radosuser_admin.access_key, - "rwm_s3_secret_key": radosuser_admin.secret_key, - }) + trwm = rwm.RWM({}) mock = Mock(return_value=[]) with patch.object(rwm.StorageManager, "storage_list", mock): assert trwm.storage_list_cmd() == 0 + + +def test_storage_drop_versions_cmd(tmpworkdir: str): # pylint: disable=unused-argument + """test storage drop versions command""" + + trwm = rwm.RWM({}) + + mock = Mock(return_value=0) + with patch.object(rwm.StorageManager, "storage_drop_versions", mock): + assert trwm.storage_drop_versions_cmd("dummy") == 0 diff --git a/tests/test_storage.py b/tests/test_storage.py index 7121153..061da3e 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -159,3 +159,29 @@ def test_storage_list( radosuser_admin.bucket_create("no-acl-dummy") radosuser_admin.storage_create(bucket_name, target_username) assert radosuser_admin.storage_list() + + +def test_storage_drop_versions(tmpworkdir: str, microceph: str, radosuser_admin: rwm.StorageManager): # pylint: disable=unused-argument + """test manager storage_drop_versions""" + + bucket_name = "testbuckx" + target_username = "test1" + bucket = radosuser_admin.storage_create(bucket_name, target_username) + + bucket.upload_fileobj(BytesIO(b"dummydata1"), "dummykey") + bucket.upload_fileobj(BytesIO(b"dummydata2"), "dummykey") + bucket.Object("dummykey").delete() + bucket.upload_fileobj(BytesIO(b"dummydata3"), "dummykey") + + # boto3 resource api + object_versions = list(bucket.object_versions.all()) + assert len(object_versions) == 4 + # boto3 client api + object_versions = radosuser_admin.s3.meta.client.list_object_versions(Bucket=bucket.name) + assert len(object_versions["Versions"]) == 3 + assert len(object_versions["DeleteMarkers"]) == 1 + + assert radosuser_admin.storage_drop_versions(bucket.name) == 0 + + object_versions = list(bucket.object_versions.all()) + assert len(object_versions) == 1 -- GitLab