diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..6e8ce052d9fd56e85d1108b529b31621d8bb267a --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 150 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e3b95ea9ca809f2935a44cad89176fc7d20b30c4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +__pycache__/ +.coverage +.vscode/ +rwm.conf +testfile* +venv/ \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..cd01475054c5d0182557154387bc8134b3e07f31 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,9 @@ +[FORMAT] +max-line-length=150 + +[TYPECHECK] +ignored-classes=SQLAlchemy, sqlalchemy.orm.scoping.scoped_session + +[SIMILARITIES] +min-similarity-lines=8 +ignore-imports=yes \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..c4f70cacb41f82dc53fc06c81df88b19fb6938f8 --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +all: lint + +install: + apt-get -y install awscli make rclone yamllint + +venv: + apt-get -y install python3-venv + python3 -m venv venv + venv/bin/pip install -U pip + venv/bin/pip install -r requirements.lock + +freeze: + @pip freeze | grep -v '^pkg[-_]resources=' + +lint: lint-py lint-yaml + +lint-py: + python3 -m flake8 rwm.py tests + python3 -m pylint rwm.py tests + +lint-yaml: + yamllint --strict . + +test: + python3 -m pytest -v tests/ + +coverage: + coverage run --source rwm -m pytest tests/ -x -vv + coverage report --show-missing --fail-under 100 diff --git a/README.md b/README.md index d4245f974ce68247a103f2177dfcd911a4400e48..8615f2ee11a55ffbd3e6ee6b93d49156953d9859 100644 --- a/README.md +++ b/README.md @@ -44,3 +44,39 @@ RWM can: TBD: * unlike in other backup solutions, attacker with credentials can restore any old data from the repository/bucket * number of object files vs size + + +## Install + +``` +git clone git@gitlab.flab.cesnet.cz:bodik/rwm.git /opt/rwm +cd /opt/rwm +make venv +make install +``` + + +## simple copy: rclone with crypt overlay + +* s3 + crypt overlay + +``` +cp rwm.conf.example rwm.conf +edit rwm.conf +rwm rcc sync /data rwmbe:/ +rwm rcc lsl rwmbe:/ +``` + +### Notes + +* corect, fails to download corrupted files +``` +root@bacula-test:/opt/rwm# ./rwm.py rcc copy rwmbe:/testfile.txt . +2024/03/23 16:54:31 ERROR : testfile.txt: Failed to copy: failed to open source object: not an encrypted file - bad magic string +2024/03/23 16:54:31 ERROR : Attempt 1/3 failed with 1 errors and: failed to open source object: not an encrypted file - bad magic string +``` + +* corect, skips bad filenames +``` +2024/03/23 16:53:56 DEBUG : 6p78fe3tlp5o7ngi241jsjl2qX: Skipping undecryptable file name: illegal base32 data at input byte 25 +``` \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/requirements.lock b/requirements.lock new file mode 100644 index 0000000000000000000000000000000000000000..54ee3a7939fd01ccb076f6f6337418899c407a4a --- /dev/null +++ b/requirements.lock @@ -0,0 +1,80 @@ +annotated-types==0.6.0 +astroid==3.1.0 +attrs==23.2.0 +aws-sam-translator==1.86.0 +aws-xray-sdk==2.13.0 +blinker==1.7.0 +boto3==1.34.69 +botocore==1.34.69 +certifi==2024.2.2 +cffi==1.16.0 +cfn-lint==0.86.1 +charset-normalizer==3.3.2 +click==8.1.7 +coverage==7.4.4 +cryptography==42.0.5 +dill==0.3.8 +docker==7.0.0 +flake8==7.0.0 +Flask==3.0.2 +Flask-Cors==4.0.0 +graphql-core==3.2.3 +idna==3.6 +iniconfig==2.0.0 +isort==5.13.2 +itsdangerous==2.1.2 +Jinja2==3.1.3 +jmespath==1.0.1 +joserfc==0.9.0 +jschema-to-python==1.2.3 +jsondiff==2.0.0 +jsonpatch==1.33 +jsonpickle==3.0.3 +jsonpointer==2.4 +jsonschema==4.21.1 +jsonschema-path==0.3.2 +jsonschema-specifications==2023.12.1 +junit-xml==1.9 +lazy-object-proxy==1.10.0 +MarkupSafe==2.1.5 +mccabe==0.7.0 +moto==5.0.3 +mpmath==1.3.0 +networkx==3.2.1 +openapi-schema-validator==0.6.2 +openapi-spec-validator==0.7.1 +packaging==24.0 +pathable==0.4.3 +pbr==6.0.0 +platformdirs==4.2.0 +pluggy==1.4.0 +psutil==5.9.8 +py-partiql-parser==0.5.1 +pycodestyle==2.11.1 +pycparser==2.21 +pycryptodome==3.20.0 +pydantic==2.6.4 +pydantic_core==2.16.3 +pyflakes==3.2.0 +pylint==3.1.0 +pyparsing==3.1.2 +pytest==8.1.1 +pytest-xprocess==1.0.0 +python-dateutil==2.9.0.post0 +PyYAML==6.0.1 +referencing==0.31.1 +regex==2023.12.25 +requests==2.31.0 +responses==0.25.0 +rfc3339-validator==0.1.4 +rpds-py==0.18.0 +s3transfer==0.10.1 +sarif-om==1.0.4 +six==1.16.0 +sympy==1.12 +tomlkit==0.12.4 +typing_extensions==4.10.0 +urllib3==2.2.1 +Werkzeug==3.0.1 +wrapt==1.16.0 +xmltodict==0.13.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..0020f1cb406d3ed215187378a693c0e980fad4c3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +# runtime +pycryptodome + +# dev +flake8 +coverage +moto[server] +pytest +pytest-xprocess +pylint +pytest diff --git a/rwm.conf.example b/rwm.conf.example new file mode 100644 index 0000000000000000000000000000000000000000..06b9654719bf3dde1882825a04a3ef4e1e93e6f6 --- /dev/null +++ b/rwm.conf.example @@ -0,0 +1,6 @@ +S3_ENDPOINT_URL: "" +S3_ACCESS_KEY: "" +S3_SECRET_KEY: "" + +RCC_CRYPT_BUCKET: "rwmcrypt" +RCC_CRYPT_PASSWORD: "" diff --git a/rwm.py b/rwm.py new file mode 100755 index 0000000000000000000000000000000000000000..5e7cf77c8bce7ca4c32eb1f8654aec153fe8715e --- /dev/null +++ b/rwm.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +"""rwm, restic/s3 worm manager""" + +import base64 +import logging +import os +import sys +from argparse import ArgumentParser +from pathlib import Path +from subprocess import run as subrun + +import yaml +from Crypto.Cipher import AES +from Crypto.Random import get_random_bytes + + +def is_sublist(needle, haystack): + """Check if needle is a sublist of haystack using list slicing and equality comparison""" + + # If needle is empty, it's considered a sublist of any list + if not needle: + return True + return any(haystack[i:i+len(needle)] == needle for i in range(len(haystack))) + + +def get_config(path): + """load config""" + + if Path(path).exists(): + return yaml.safe_load(Path(path).read_text(encoding='utf-8')) or {} + return {} + + +def rclone_obscure_password(plaintext, iv=None): + """rclone obscure password algorithm""" + + # https://github.com/rclone/rclone/blob/master/fs/config/obscure/obscure.go + # https://github.com/maaaaz/rclonedeobscure + secret_key = b"\x9c\x93\x5b\x48\x73\x0a\x55\x4d\x6b\xfd\x7c\x63\xc8\x86\xa9\x2b\xd3\x90\x19\x8e\xb8\x12\x8a\xfb\xf4\xde\x16\x2b\x8b\x95\xf6\x38" + if not iv: + iv = get_random_bytes(AES.block_size) + cipher = AES.new(key=secret_key, mode=AES.MODE_CTR, initial_value=iv, nonce=b'') + data = iv + cipher.encrypt(plaintext.encode()) + return base64.urlsafe_b64encode(data).decode().rstrip("=") + + +class RWM: + """rwm impl""" + + def __init__(self, config): + self.config = config + + def aws_cmd(self, args): + """aws cli wrapper""" + + env = { + "PATH": os.environ["PATH"], + "AWS_METADATA_SERVICE_NUM_ATTEMPTS": "0", + "AWS_ACCESS_KEY_ID": self.config["S3_ACCESS_KEY"], + "AWS_SECRET_ACCESS_KEY": self.config["S3_SECRET_KEY"] + } + if is_sublist(["s3", "mb"], args): + # region must be set and empty for awscil >=2.x and ?du? ceph s3 + env.update({"AWS_DEFAULT_REGION": ""}) + + # aws cli does not have endpoint-url as env config option + return subrun(["aws", "--endpoint-url", self.config["S3_ENDPOINT_URL"]] + args, env=env, check=False).returncode + + def rclone_cmd(self, args): + """rclone wrapper""" + + # ensure command uses expected backend + if (not any(x.startswith("rwmbe:") for x in args)) and all(x != "help" for x in args): + logging.error("rclone command missing 'rwmbe:' backend specification") + return 1 + + env = { + "RCLONE_CONFIG": "", + "RCLONE_CONFIG_RWMBE_TYPE": "s3", + "RCLONE_CONFIG_RWMBE_ENDPOINT": self.config["S3_ENDPOINT_URL"], + "RCLONE_CONFIG_RWMBE_ACCESS_KEY_ID": self.config["S3_ACCESS_KEY"], + "RCLONE_CONFIG_RWMBE_SECRET_ACCESS_KEY": self.config["S3_SECRET_KEY"], + "RCLONE_CONFIG_RWMBE_PROVIDER": "Ceph", + "RCLONE_CONFIG_RWMBE_ENV_AUTH": "false", + "RCLONE_CONFIG_RWMBE_REGION": "", + } + return subrun(["rclone"] + args, env=env, check=False).returncode + + def rclone_crypt_cmd(self, args): + """ + rclone crypt wrapper + * https://rclone.org/docs/#config-file + * https://rclone.org/crypt/ + """ + + # ensure command uses expected backend + if not any(x.startswith("rwmbe:") for x in args): + logging.error("rclone command missing 'rwmbe:' backend specification") + return 1 + + env = { + "RCLONE_CONFIG": "", + "RCLONE_CONFIG_RWMBE_TYPE": "crypt", + "RCLONE_CONFIG_RWMBE_REMOTE": f"rwms3be:/{self.config['RCC_CRYPT_BUCKET']}", + "RCLONE_CONFIG_RWMBE_PASSWORD": rclone_obscure_password(self.config["RCC_CRYPT_PASSWORD"]), + "RCLONE_CONFIG_RWMBE_PASSWORD2": rclone_obscure_password(self.config["RCC_CRYPT_PASSWORD"]), + + "RCLONE_CONFIG_RWMS3BE_TYPE": "s3", + "RCLONE_CONFIG_RWMS3BE_ENDPOINT": self.config["S3_ENDPOINT_URL"], + "RCLONE_CONFIG_RWMS3BE_ACCESS_KEY_ID": self.config["S3_ACCESS_KEY"], + "RCLONE_CONFIG_RWMS3BE_SECRET_ACCESS_KEY": self.config["S3_SECRET_KEY"], + "RCLONE_CONFIG_RWMS3BE_PROVIDER": "Ceph", + "RCLONE_CONFIG_RWMS3BE_ENV_AUTH": "false", + "RCLONE_CONFIG_RWMS3BE_REGION": "", + } + return subrun(["rclone"] + args, env=env, check=False).returncode + + +def main(argv=None, dict_config=None): + """main""" + + parser = ArgumentParser(description="restics3 worm manager") + parser.add_argument("--config", default="rwm.conf") + + subparsers = parser.add_subparsers(title="commands", dest="command", required=False) + aws_cmd_parser = subparsers.add_parser("aws", help="aws command") + aws_cmd_parser.add_argument("cmd_args", nargs="*") + rc_cmd_parser = subparsers.add_parser("rc", help="rclone command") + rc_cmd_parser.add_argument("cmd_args", nargs="*") + rcc_cmd_parser = subparsers.add_parser("rcc", help="rclone command with crypt overlay") + rcc_cmd_parser.add_argument("cmd_args", nargs="*") + + args = parser.parse_args(argv) + + config = {} + if args.config: + config.update(get_config(args.config)) + if dict_config: + config.update(dict_config) + # assert config ? + rwm = RWM(config) + + if args.command == "aws": + return rwm.aws_cmd(args.cmd_args) + if args.command == "rc": + return rwm.rclone_cmd(args.cmd_args) + if args.command == "rcc": + return rwm.rclone_crypt_cmd(args.cmd_args) + + return 0 + + +if __name__ == "__main__": # pragma: nocover + sys.exit(main()) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000000000000000000000000000000000..45e63a7d3863a77f9d8a2f18e084ae0f6cb6fa34 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,44 @@ +"""pytest conftest""" + +import os +import shutil +import socket +from tempfile import mkdtemp + +import pytest +from xprocess import ProcessStarter + + +@pytest.fixture +def motoserver(xprocess): + """mocking s3 server fixture""" + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + + class Starter(ProcessStarter): + """xprocess stub""" + pattern = "This is a development server" + args = ["moto_server", "--port", str(port)] + terminate_on_interrupt = True + + xprocess.ensure("motoserver", Starter) + yield f"http://localhost:{port}" + xprocess.getinfo("motoserver").terminate() + + +@pytest.fixture +def tmpworkdir(): + """ + self cleaning temporary workdir + pytest tmpdir fixture has issues https://github.com/pytest-dev/pytest/issues/1120 + """ + + cwd = os.getcwd() + tmpdir = mkdtemp(prefix='rwm_test-') + os.chdir(tmpdir) + yield tmpdir + os.chdir(cwd) + shutil.rmtree(tmpdir) diff --git a/tests/test_default.py b/tests/test_default.py new file mode 100644 index 0000000000000000000000000000000000000000..e0a0249898ab2ce0ba3a35793e3ea5dd9b1b5074 --- /dev/null +++ b/tests/test_default.py @@ -0,0 +1,117 @@ +"""default tests""" + +from pathlib import Path + +import boto3 +from rwm import is_sublist, main as rwm_main, rclone_obscure_password + + +def test_sublist(): + """test sublist""" + + assert is_sublist([], []) + assert is_sublist([1, 2, 3], [5, 4, 1, 2, 3, 6, 7]) + assert not is_sublist([1, 3], [5, 4, 1, 2, 3, 6, 7]) + + +def test_config(tmpworkdir: str): # pylint: disable=unused-argument + """test config handling""" + + Path("rwm.conf").touch() + rwm_main([]) + + +def buckets_plain_list(full_response): + """boto3 helper""" + + return [x["Name"] for x in full_response["Buckets"]] + + +def objects_plain_list(full_response): + """boto3 helper""" + + return [x["Key"] for x in full_response["Contents"]] + + +def test_aws(tmpworkdir: str, motoserver: str): # pylint: disable=unused-argument + """test aws command""" + + rwm_conf = { + "S3_ENDPOINT_URL": motoserver, + "S3_ACCESS_KEY": "dummy", + "S3_SECRET_KEY": "dummy", + } + s3 = boto3.client('s3', endpoint_url=motoserver, aws_access_key_id="dummy", aws_secret_access_key="dummy") + test_bucket = "testbucket" + + assert test_bucket not in buckets_plain_list(s3.list_buckets()) + + rwm_main(["aws", "s3", "mb", f"s3://{test_bucket}"], rwm_conf) + assert test_bucket in buckets_plain_list(s3.list_buckets()) + + rwm_main(["aws", "s3", "rb", f"s3://{test_bucket}"], rwm_conf) + assert test_bucket not in buckets_plain_list(s3.list_buckets()) + + +def test_rclone(tmpworkdir: str, motoserver: str): # pylint: disable=unused-argument + """test rclone command""" + + rwm_conf = { + "S3_ENDPOINT_URL": motoserver, + "S3_ACCESS_KEY": "dummy", + "S3_SECRET_KEY": "dummy", + } + s3 = boto3.client('s3', endpoint_url=motoserver, aws_access_key_id="dummy", aws_secret_access_key="dummy") + + test_bucket = "testbucket" + test_file = "testfile.txt" + Path(test_file).write_text('1234', encoding='utf-8') + + rwm_main(["rc", "mkdir", f"rwmbe:/{test_bucket}/"], rwm_conf) + rwm_main(["rc", "copy", test_file, f"rwmbe:/{test_bucket}/"], rwm_conf) + assert test_bucket in buckets_plain_list(s3.list_buckets()) + assert test_file in objects_plain_list(s3.list_objects_v2(Bucket=test_bucket)) + + +def test_rclone_argscheck(): + """test rclone args checking""" + + assert rwm_main(["rc", "dummy"]) == 1 + + +def test_rclone_crypt(tmpworkdir: str, motoserver: str): # pylint: disable=unused-argument + """test rclone with crypt overlay""" + + rwm_conf = { + "S3_ENDPOINT_URL": motoserver, + "S3_ACCESS_KEY": "dummy", + "S3_SECRET_KEY": "dummy", + "RCC_CRYPT_BUCKET": "cryptdata_test", + "RCC_CRYPT_PASSWORD": rclone_obscure_password("dummydummydummydummydummydummydummydummy"), + } + s3 = boto3.client('s3', endpoint_url=motoserver, aws_access_key_id="dummy", aws_secret_access_key="dummy") + + test_bucket = "testbucket" + test_file = "testfile.txt" + Path(test_file).write_text('1234', encoding='utf-8') + + rwm_main(["rcc", "copy", test_file, f"rwmbe:/{test_bucket}/"], rwm_conf) + assert len(objects_plain_list(s3.list_objects_v2(Bucket=rwm_conf["RCC_CRYPT_BUCKET"]))) == 1 + + rwm_main(["rcc", "delete", f"rwmbe:/{test_bucket}/{test_file}"], rwm_conf) + assert s3.list_objects_v2(Bucket=rwm_conf["RCC_CRYPT_BUCKET"])["KeyCount"] == 0 + + test_file1 = "testfile1.txt" + Path(test_file1).write_text('4321', encoding='utf-8') + rwm_main(["rcc", "sync", ".", f"rwmbe:/{test_bucket}/"], rwm_conf) + assert s3.list_objects_v2(Bucket=rwm_conf["RCC_CRYPT_BUCKET"])["KeyCount"] == 2 + + Path(test_file1).unlink() + rwm_main(["rcc", "sync", ".", f"rwmbe:/{test_bucket}/"], rwm_conf) + assert s3.list_objects_v2(Bucket=rwm_conf["RCC_CRYPT_BUCKET"])["KeyCount"] == 1 + + +def test_rclone_crypt_argscheck(): + """test rclone crypt args checking""" + + assert rwm_main(["rcc", "dummy"]) == 1