From 12b2f02590a89bf421dd92aff2aea884ada475b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radoslav=20Bod=C3=B3?= <bodik@cesnet.cz>
Date: Mon, 25 Mar 2024 13:42:14 +0100
Subject: [PATCH] rclone crypt simple copy, initial implementation

---
 .flake8               |   2 +
 .gitignore            |   6 ++
 .pylintrc             |   9 +++
 Makefile              |  29 ++++++++
 README.md             |  36 ++++++++++
 pytest.ini            |   0
 requirements.lock     |  80 ++++++++++++++++++++++
 requirements.txt      |  11 +++
 rwm.conf.example      |   6 ++
 rwm.py                | 154 ++++++++++++++++++++++++++++++++++++++++++
 tests/conftest.py     |  44 ++++++++++++
 tests/test_default.py | 117 ++++++++++++++++++++++++++++++++
 12 files changed, 494 insertions(+)
 create mode 100644 .flake8
 create mode 100644 .gitignore
 create mode 100644 .pylintrc
 create mode 100644 Makefile
 create mode 100644 pytest.ini
 create mode 100644 requirements.lock
 create mode 100644 requirements.txt
 create mode 100644 rwm.conf.example
 create mode 100755 rwm.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_default.py

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..6e8ce05
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 150
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e3b95ea
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,6 @@
+__pycache__/
+.coverage
+.vscode/
+rwm.conf
+testfile*
+venv/
\ No newline at end of file
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..cd01475
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,9 @@
+[FORMAT]
+max-line-length=150
+
+[TYPECHECK]
+ignored-classes=SQLAlchemy, sqlalchemy.orm.scoping.scoped_session
+
+[SIMILARITIES]
+min-similarity-lines=8
+ignore-imports=yes
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c4f70ca
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,29 @@
+all: lint
+
+install:
+	apt-get -y install awscli make rclone yamllint
+
+venv:
+	apt-get -y install python3-venv
+	python3 -m venv venv
+	venv/bin/pip install -U pip
+	venv/bin/pip install -r requirements.lock
+
+freeze:
+	@pip freeze | grep -v '^pkg[-_]resources='
+
+lint: lint-py lint-yaml
+
+lint-py:
+	python3 -m flake8 rwm.py tests
+	python3 -m pylint rwm.py tests
+
+lint-yaml:
+	yamllint --strict .
+
+test:
+	python3 -m pytest -v tests/
+
+coverage:
+	coverage run --source rwm -m pytest tests/ -x -vv
+	coverage report --show-missing --fail-under 100
diff --git a/README.md b/README.md
index d4245f9..8615f2e 100644
--- a/README.md
+++ b/README.md
@@ -44,3 +44,39 @@ RWM can:
 TBD:
 * unlike in other backup solutions, attacker with credentials can restore any old data from the repository/bucket
 * number of object files vs size
+
+
+## Install
+
+```
+git clone git@gitlab.flab.cesnet.cz:bodik/rwm.git /opt/rwm
+cd /opt/rwm
+make venv
+make install
+```
+
+
+## simple copy: rclone with crypt overlay
+
+* s3 + crypt overlay
+
+```
+cp rwm.conf.example rwm.conf
+edit rwm.conf
+rwm rcc sync /data rwmbe:/
+rwm rcc lsl rwmbe:/
+```
+
+### Notes
+
+* corect, fails to download corrupted files
+```
+root@bacula-test:/opt/rwm# ./rwm.py rcc copy rwmbe:/testfile.txt .
+2024/03/23 16:54:31 ERROR : testfile.txt: Failed to copy: failed to open source object: not an encrypted file - bad magic string
+2024/03/23 16:54:31 ERROR : Attempt 1/3 failed with 1 errors and: failed to open source object: not an encrypted file - bad magic string
+```
+
+* corect, skips bad filenames
+```
+2024/03/23 16:53:56 DEBUG : 6p78fe3tlp5o7ngi241jsjl2qX: Skipping undecryptable file name: illegal base32 data at input byte 25
+```
\ No newline at end of file
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.lock b/requirements.lock
new file mode 100644
index 0000000..54ee3a7
--- /dev/null
+++ b/requirements.lock
@@ -0,0 +1,80 @@
+annotated-types==0.6.0
+astroid==3.1.0
+attrs==23.2.0
+aws-sam-translator==1.86.0
+aws-xray-sdk==2.13.0
+blinker==1.7.0
+boto3==1.34.69
+botocore==1.34.69
+certifi==2024.2.2
+cffi==1.16.0
+cfn-lint==0.86.1
+charset-normalizer==3.3.2
+click==8.1.7
+coverage==7.4.4
+cryptography==42.0.5
+dill==0.3.8
+docker==7.0.0
+flake8==7.0.0
+Flask==3.0.2
+Flask-Cors==4.0.0
+graphql-core==3.2.3
+idna==3.6
+iniconfig==2.0.0
+isort==5.13.2
+itsdangerous==2.1.2
+Jinja2==3.1.3
+jmespath==1.0.1
+joserfc==0.9.0
+jschema-to-python==1.2.3
+jsondiff==2.0.0
+jsonpatch==1.33
+jsonpickle==3.0.3
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-path==0.3.2
+jsonschema-specifications==2023.12.1
+junit-xml==1.9
+lazy-object-proxy==1.10.0
+MarkupSafe==2.1.5
+mccabe==0.7.0
+moto==5.0.3
+mpmath==1.3.0
+networkx==3.2.1
+openapi-schema-validator==0.6.2
+openapi-spec-validator==0.7.1
+packaging==24.0
+pathable==0.4.3
+pbr==6.0.0
+platformdirs==4.2.0
+pluggy==1.4.0
+psutil==5.9.8
+py-partiql-parser==0.5.1
+pycodestyle==2.11.1
+pycparser==2.21
+pycryptodome==3.20.0
+pydantic==2.6.4
+pydantic_core==2.16.3
+pyflakes==3.2.0
+pylint==3.1.0
+pyparsing==3.1.2
+pytest==8.1.1
+pytest-xprocess==1.0.0
+python-dateutil==2.9.0.post0
+PyYAML==6.0.1
+referencing==0.31.1
+regex==2023.12.25
+requests==2.31.0
+responses==0.25.0
+rfc3339-validator==0.1.4
+rpds-py==0.18.0
+s3transfer==0.10.1
+sarif-om==1.0.4
+six==1.16.0
+sympy==1.12
+tomlkit==0.12.4
+typing_extensions==4.10.0
+urllib3==2.2.1
+Werkzeug==3.0.1
+wrapt==1.16.0
+xmltodict==0.13.0
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..0020f1c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+# runtime
+pycryptodome
+
+# dev
+flake8
+coverage
+moto[server]
+pytest
+pytest-xprocess
+pylint
+pytest
diff --git a/rwm.conf.example b/rwm.conf.example
new file mode 100644
index 0000000..06b9654
--- /dev/null
+++ b/rwm.conf.example
@@ -0,0 +1,6 @@
+S3_ENDPOINT_URL: ""
+S3_ACCESS_KEY: ""
+S3_SECRET_KEY: ""
+
+RCC_CRYPT_BUCKET: "rwmcrypt"
+RCC_CRYPT_PASSWORD: ""
diff --git a/rwm.py b/rwm.py
new file mode 100755
index 0000000..5e7cf77
--- /dev/null
+++ b/rwm.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""rwm, restic/s3 worm manager"""
+
+import base64
+import logging
+import os
+import sys
+from argparse import ArgumentParser
+from pathlib import Path
+from subprocess import run as subrun
+
+import yaml
+from Crypto.Cipher import AES
+from Crypto.Random import get_random_bytes
+
+
+def is_sublist(needle, haystack):
+    """Check if needle is a sublist of haystack using list slicing and equality comparison"""
+
+    # If needle is empty, it's considered a sublist of any list
+    if not needle:
+        return True
+    return any(haystack[i:i+len(needle)] == needle for i in range(len(haystack)))
+
+
+def get_config(path):
+    """load config"""
+
+    if Path(path).exists():
+        return yaml.safe_load(Path(path).read_text(encoding='utf-8')) or {}
+    return {}
+
+
+def rclone_obscure_password(plaintext, iv=None):
+    """rclone obscure password algorithm"""
+
+    # https://github.com/rclone/rclone/blob/master/fs/config/obscure/obscure.go
+    # https://github.com/maaaaz/rclonedeobscure
+    secret_key = b"\x9c\x93\x5b\x48\x73\x0a\x55\x4d\x6b\xfd\x7c\x63\xc8\x86\xa9\x2b\xd3\x90\x19\x8e\xb8\x12\x8a\xfb\xf4\xde\x16\x2b\x8b\x95\xf6\x38"
+    if not iv:
+        iv = get_random_bytes(AES.block_size)
+    cipher = AES.new(key=secret_key, mode=AES.MODE_CTR, initial_value=iv, nonce=b'')
+    data = iv + cipher.encrypt(plaintext.encode())
+    return base64.urlsafe_b64encode(data).decode().rstrip("=")
+
+
+class RWM:
+    """rwm impl"""
+
+    def __init__(self, config):
+        self.config = config
+
+    def aws_cmd(self, args):
+        """aws cli wrapper"""
+
+        env = {
+            "PATH": os.environ["PATH"],
+            "AWS_METADATA_SERVICE_NUM_ATTEMPTS": "0",
+            "AWS_ACCESS_KEY_ID": self.config["S3_ACCESS_KEY"],
+            "AWS_SECRET_ACCESS_KEY": self.config["S3_SECRET_KEY"]
+        }
+        if is_sublist(["s3", "mb"], args):
+            # region must be set and empty for awscil >=2.x and ?du? ceph s3
+            env.update({"AWS_DEFAULT_REGION": ""})
+
+        # aws cli does not have endpoint-url as env config option
+        return subrun(["aws", "--endpoint-url", self.config["S3_ENDPOINT_URL"]] + args, env=env, check=False).returncode
+
+    def rclone_cmd(self, args):
+        """rclone wrapper"""
+
+        # ensure command uses expected backend
+        if (not any(x.startswith("rwmbe:") for x in args)) and all(x != "help" for x in args):
+            logging.error("rclone command missing 'rwmbe:' backend specification")
+            return 1
+
+        env = {
+            "RCLONE_CONFIG": "",
+            "RCLONE_CONFIG_RWMBE_TYPE": "s3",
+            "RCLONE_CONFIG_RWMBE_ENDPOINT": self.config["S3_ENDPOINT_URL"],
+            "RCLONE_CONFIG_RWMBE_ACCESS_KEY_ID": self.config["S3_ACCESS_KEY"],
+            "RCLONE_CONFIG_RWMBE_SECRET_ACCESS_KEY": self.config["S3_SECRET_KEY"],
+            "RCLONE_CONFIG_RWMBE_PROVIDER": "Ceph",
+            "RCLONE_CONFIG_RWMBE_ENV_AUTH": "false",
+            "RCLONE_CONFIG_RWMBE_REGION": "",
+        }
+        return subrun(["rclone"] + args, env=env, check=False).returncode
+
+    def rclone_crypt_cmd(self, args):
+        """
+        rclone crypt wrapper
+        * https://rclone.org/docs/#config-file
+        * https://rclone.org/crypt/
+        """
+
+        # ensure command uses expected backend
+        if not any(x.startswith("rwmbe:") for x in args):
+            logging.error("rclone command missing 'rwmbe:' backend specification")
+            return 1
+
+        env = {
+            "RCLONE_CONFIG": "",
+            "RCLONE_CONFIG_RWMBE_TYPE": "crypt",
+            "RCLONE_CONFIG_RWMBE_REMOTE": f"rwms3be:/{self.config['RCC_CRYPT_BUCKET']}",
+            "RCLONE_CONFIG_RWMBE_PASSWORD": rclone_obscure_password(self.config["RCC_CRYPT_PASSWORD"]),
+            "RCLONE_CONFIG_RWMBE_PASSWORD2": rclone_obscure_password(self.config["RCC_CRYPT_PASSWORD"]),
+
+            "RCLONE_CONFIG_RWMS3BE_TYPE": "s3",
+            "RCLONE_CONFIG_RWMS3BE_ENDPOINT": self.config["S3_ENDPOINT_URL"],
+            "RCLONE_CONFIG_RWMS3BE_ACCESS_KEY_ID": self.config["S3_ACCESS_KEY"],
+            "RCLONE_CONFIG_RWMS3BE_SECRET_ACCESS_KEY": self.config["S3_SECRET_KEY"],
+            "RCLONE_CONFIG_RWMS3BE_PROVIDER": "Ceph",
+            "RCLONE_CONFIG_RWMS3BE_ENV_AUTH": "false",
+            "RCLONE_CONFIG_RWMS3BE_REGION": "",
+        }
+        return subrun(["rclone"] + args, env=env, check=False).returncode
+
+
+def main(argv=None, dict_config=None):
+    """main"""
+
+    parser = ArgumentParser(description="restics3 worm manager")
+    parser.add_argument("--config", default="rwm.conf")
+
+    subparsers = parser.add_subparsers(title="commands", dest="command", required=False)
+    aws_cmd_parser = subparsers.add_parser("aws", help="aws command")
+    aws_cmd_parser.add_argument("cmd_args", nargs="*")
+    rc_cmd_parser = subparsers.add_parser("rc", help="rclone command")
+    rc_cmd_parser.add_argument("cmd_args", nargs="*")
+    rcc_cmd_parser = subparsers.add_parser("rcc", help="rclone command with crypt overlay")
+    rcc_cmd_parser.add_argument("cmd_args", nargs="*")
+
+    args = parser.parse_args(argv)
+
+    config = {}
+    if args.config:
+        config.update(get_config(args.config))
+    if dict_config:
+        config.update(dict_config)
+    # assert config ?
+    rwm = RWM(config)
+
+    if args.command == "aws":
+        return rwm.aws_cmd(args.cmd_args)
+    if args.command == "rc":
+        return rwm.rclone_cmd(args.cmd_args)
+    if args.command == "rcc":
+        return rwm.rclone_crypt_cmd(args.cmd_args)
+
+    return 0
+
+
+if __name__ == "__main__":  # pragma: nocover
+    sys.exit(main())
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..45e63a7
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,44 @@
+"""pytest conftest"""
+
+import os
+import shutil
+import socket
+from tempfile import mkdtemp
+
+import pytest
+from xprocess import ProcessStarter
+
+
+@pytest.fixture
+def motoserver(xprocess):
+    """mocking s3 server fixture"""
+
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.bind(("", 0))
+    port = sock.getsockname()[1]
+    sock.close()
+
+    class Starter(ProcessStarter):
+        """xprocess stub"""
+        pattern = "This is a development server"
+        args = ["moto_server", "--port", str(port)]
+        terminate_on_interrupt = True
+
+    xprocess.ensure("motoserver", Starter)
+    yield f"http://localhost:{port}"
+    xprocess.getinfo("motoserver").terminate()
+
+
+@pytest.fixture
+def tmpworkdir():
+    """
+    self cleaning temporary workdir
+    pytest tmpdir fixture has issues https://github.com/pytest-dev/pytest/issues/1120
+    """
+
+    cwd = os.getcwd()
+    tmpdir = mkdtemp(prefix='rwm_test-')
+    os.chdir(tmpdir)
+    yield tmpdir
+    os.chdir(cwd)
+    shutil.rmtree(tmpdir)
diff --git a/tests/test_default.py b/tests/test_default.py
new file mode 100644
index 0000000..e0a0249
--- /dev/null
+++ b/tests/test_default.py
@@ -0,0 +1,117 @@
+"""default tests"""
+
+from pathlib import Path
+
+import boto3
+from rwm import is_sublist, main as rwm_main, rclone_obscure_password
+
+
+def test_sublist():
+    """test sublist"""
+
+    assert is_sublist([], [])
+    assert is_sublist([1, 2, 3], [5, 4, 1, 2, 3, 6, 7])
+    assert not is_sublist([1, 3], [5, 4, 1, 2, 3, 6, 7])
+
+
+def test_config(tmpworkdir: str):  # pylint: disable=unused-argument
+    """test config handling"""
+
+    Path("rwm.conf").touch()
+    rwm_main([])
+
+
+def buckets_plain_list(full_response):
+    """boto3 helper"""
+
+    return [x["Name"] for x in full_response["Buckets"]]
+
+
+def objects_plain_list(full_response):
+    """boto3 helper"""
+
+    return [x["Key"] for x in full_response["Contents"]]
+
+
+def test_aws(tmpworkdir: str, motoserver: str):  # pylint: disable=unused-argument
+    """test aws command"""
+
+    rwm_conf = {
+        "S3_ENDPOINT_URL": motoserver,
+        "S3_ACCESS_KEY": "dummy",
+        "S3_SECRET_KEY": "dummy",
+    }
+    s3 = boto3.client('s3', endpoint_url=motoserver, aws_access_key_id="dummy", aws_secret_access_key="dummy")
+    test_bucket = "testbucket"
+
+    assert test_bucket not in buckets_plain_list(s3.list_buckets())
+
+    rwm_main(["aws", "s3", "mb", f"s3://{test_bucket}"], rwm_conf)
+    assert test_bucket in buckets_plain_list(s3.list_buckets())
+
+    rwm_main(["aws", "s3", "rb", f"s3://{test_bucket}"], rwm_conf)
+    assert test_bucket not in buckets_plain_list(s3.list_buckets())
+
+
+def test_rclone(tmpworkdir: str, motoserver: str):  # pylint: disable=unused-argument
+    """test rclone command"""
+
+    rwm_conf = {
+        "S3_ENDPOINT_URL": motoserver,
+        "S3_ACCESS_KEY": "dummy",
+        "S3_SECRET_KEY": "dummy",
+    }
+    s3 = boto3.client('s3', endpoint_url=motoserver, aws_access_key_id="dummy", aws_secret_access_key="dummy")
+
+    test_bucket = "testbucket"
+    test_file = "testfile.txt"
+    Path(test_file).write_text('1234', encoding='utf-8')
+
+    rwm_main(["rc", "mkdir", f"rwmbe:/{test_bucket}/"], rwm_conf)
+    rwm_main(["rc", "copy", test_file, f"rwmbe:/{test_bucket}/"], rwm_conf)
+    assert test_bucket in buckets_plain_list(s3.list_buckets())
+    assert test_file in objects_plain_list(s3.list_objects_v2(Bucket=test_bucket))
+
+
+def test_rclone_argscheck():
+    """test rclone args checking"""
+
+    assert rwm_main(["rc", "dummy"]) == 1
+
+
+def test_rclone_crypt(tmpworkdir: str, motoserver: str):  # pylint: disable=unused-argument
+    """test rclone with crypt overlay"""
+
+    rwm_conf = {
+        "S3_ENDPOINT_URL": motoserver,
+        "S3_ACCESS_KEY": "dummy",
+        "S3_SECRET_KEY": "dummy",
+        "RCC_CRYPT_BUCKET": "cryptdata_test",
+        "RCC_CRYPT_PASSWORD": rclone_obscure_password("dummydummydummydummydummydummydummydummy"),
+    }
+    s3 = boto3.client('s3', endpoint_url=motoserver, aws_access_key_id="dummy", aws_secret_access_key="dummy")
+
+    test_bucket = "testbucket"
+    test_file = "testfile.txt"
+    Path(test_file).write_text('1234', encoding='utf-8')
+
+    rwm_main(["rcc", "copy", test_file, f"rwmbe:/{test_bucket}/"], rwm_conf)
+    assert len(objects_plain_list(s3.list_objects_v2(Bucket=rwm_conf["RCC_CRYPT_BUCKET"]))) == 1
+
+    rwm_main(["rcc", "delete", f"rwmbe:/{test_bucket}/{test_file}"], rwm_conf)
+    assert s3.list_objects_v2(Bucket=rwm_conf["RCC_CRYPT_BUCKET"])["KeyCount"] == 0
+
+    test_file1 = "testfile1.txt"
+    Path(test_file1).write_text('4321', encoding='utf-8')
+    rwm_main(["rcc", "sync", ".", f"rwmbe:/{test_bucket}/"], rwm_conf)
+    assert s3.list_objects_v2(Bucket=rwm_conf["RCC_CRYPT_BUCKET"])["KeyCount"] == 2
+
+    Path(test_file1).unlink()
+    rwm_main(["rcc", "sync", ".", f"rwmbe:/{test_bucket}/"], rwm_conf)
+    assert s3.list_objects_v2(Bucket=rwm_conf["RCC_CRYPT_BUCKET"])["KeyCount"] == 1
+
+
+def test_rclone_crypt_argscheck():
+    """test rclone crypt args checking"""
+
+    assert rwm_main(["rcc", "dummy"]) == 1
-- 
GitLab