diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index 5179db2e6..1be32dd80 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -47,8 +47,12 @@ from documents.models import Workflow from documents.models import WorkflowAction from documents.models import WorkflowTrigger from documents.settings import EXPORTER_ARCHIVE_NAME +from documents.settings import EXPORTER_CRYPTO_ALGO_NAME +from documents.settings import EXPORTER_CRYPTO_KEY_ITERATIONS_NAME +from documents.settings import EXPORTER_CRYPTO_KEY_SIZE_NAME +from documents.settings import EXPORTER_CRYPTO_SALT_NAME +from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME from documents.settings import EXPORTER_FILE_NAME -from documents.settings import EXPORTER_SALT_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.utils import copy_file_with_basic_stats from paperless import version @@ -364,11 +368,19 @@ class Command(SecurityMixin, BaseCommand): # 4.2 write version information to target folder extra_metadata_path = (self.target / "metadata.json").resolve() - metadata = {"version": version.__full_version_str__} + metadata: dict[str, str | int | dict[str, str | int]] = { + "version": version.__full_version_str__, + } - # 4.2.1 If needed, write the salt value into the metadata + # 4.2.1 If needed, write the crypto values into the metadata + # Django stores most of these in the field itself, we store them once here if self.passphrase: - metadata[EXPORTER_SALT_NAME] = self.salt + metadata[EXPORTER_CRYPTO_SETTINGS_NAME] = { + EXPORTER_CRYPTO_ALGO_NAME: self.kdf_algorithm, + EXPORTER_CRYPTO_KEY_ITERATIONS_NAME: self.key_iterations, + EXPORTER_CRYPTO_SALT_NAME: self.salt, + EXPORTER_CRYPTO_KEY_SIZE_NAME: self.key_size, + } extra_metadata_path.write_text( json.dumps( metadata, @@ -544,11 +556,13 @@ class Command(SecurityMixin, BaseCommand): copy_file_with_basic_stats(source, target) def encrypt_secret_fields(self, manifest: dict) -> None: - """ """ + """ + Encrypts certain fields in the export. Currently limited to the mail account password + """ if self.passphrase: - self.setup_crypto() + self.setup_crypto(passphrase=self.passphrase) for mail_account_record in manifest["mail_accounts"]: - mail_account_record["password"] = self.encrypt_field( - mail_account_record["password"], + mail_account_record["password"] = self.encrypt_string( + value=mail_account_record["password"], ) diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index 1f9be7362..05cad41f7 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -32,8 +32,10 @@ from documents.models import Note from documents.models import Tag from documents.parsers import run_convert from documents.settings import EXPORTER_ARCHIVE_NAME +from documents.settings import EXPORTER_CRYPTO_ALGO_NAME +from documents.settings import EXPORTER_CRYPTO_SALT_NAME +from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME from documents.settings import EXPORTER_FILE_NAME -from documents.settings import EXPORTER_SALT_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.signals.handlers import update_filename_and_move_files from documents.utils import copy_file_with_basic_stats @@ -173,12 +175,24 @@ class Command(SecurityMixin, BaseCommand): with metadata_path.open() as infile: data = json.load(infile) self.version = data["version"] - if not self.passphrase and EXPORTER_SALT_NAME in data: + if not self.passphrase and EXPORTER_CRYPTO_SETTINGS_NAME in data: raise CommandError( "No passphrase was given, but this export contains encrypted fields", ) - elif EXPORTER_SALT_NAME in data: - self.salt = data[EXPORTER_SALT_NAME] + elif EXPORTER_CRYPTO_SETTINGS_NAME in data: + # Load up the values for setting up decryption + self.kdf_algorithm = data[EXPORTER_CRYPTO_SETTINGS_NAME][ + EXPORTER_CRYPTO_ALGO_NAME + ] + self.key_iterations = data[EXPORTER_CRYPTO_SETTINGS_NAME][ + EXPORTER_CRYPTO_ALGO_NAME + ] + self.key_size = data[EXPORTER_CRYPTO_SETTINGS_NAME][ + EXPORTER_CRYPTO_ALGO_NAME + ] + self.salt = data[EXPORTER_CRYPTO_SETTINGS_NAME][ + EXPORTER_CRYPTO_SALT_NAME + ] if self.version and self.version != version.__full_version_str__: self.stdout.write( @@ -323,20 +337,12 @@ class Command(SecurityMixin, BaseCommand): f"Failed to read from archive file {doc_archive_path}", ) from e - def check_acount_account_valid(mail_account_record: dict): - if EXPORTER_SALT_NAME in mail_account_record and not self.passphrase: - raise CommandError( - "The manifest file contains encrypted mail account passwords, but no passphrase was provided", - ) - self.stdout.write("Checking the manifest") for record in self.manifest: # Only check if the document files exist if this is not data only # We don't care about documents for a data only import if not self.data_only and record["model"] == "documents.document": check_document_validity(record) - elif record["model"] == "paperless_mail.mailaccount": - check_acount_account_valid(record) def _import_files_from_manifest(self): settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True) @@ -405,11 +411,13 @@ class Command(SecurityMixin, BaseCommand): document.save() def decrypt_secret_fields(self) -> None: - """ """ + """ + The converse decryption of some fields out of the export before importing to database + """ if self.passphrase: # Salt has been loaded from metadata.json at this point, so it cannot be None - self.setup_crypto(self.salt) + self.setup_crypto(passphrase=self.passphrase, salt=self.salt) for record in self.manifest: if record["model"] == "paperless_mail.mailaccount": - record["password"] = self.decrypt_field(record["password"]) + record["password"] = self.decrypt_string(value=record["password"]) diff --git a/src/documents/management/commands/mixins.py b/src/documents/management/commands/mixins.py index 8b13ca330..b0ad5315e 100644 --- a/src/documents/management/commands/mixins.py +++ b/src/documents/management/commands/mixins.py @@ -1,7 +1,6 @@ import base64 import os from argparse import ArgumentParser -from typing import Final from typing import Optional from cryptography.fernet import Fernet @@ -51,41 +50,70 @@ class ProgressBarMixin: class SecurityMixin: """ + Fully based on: https://cryptography.io/en/latest/fernet/#using-passwords-with-fernet + + To encrypt: + 1. Call setup_crypto providing the user provided passphrase + 2. Call encrypt_string with a value + 3. Store the returned hexadecimal representation of the value + + To decrypt: + 1. Load the required parameters: + a. key iterations + b. key size + c. key algorithm + 2. Call setup_crypto providing the user provided passphrase and stored salt + 3. Call decrypt_string with a value + 4. Use the returned value + """ # This matches to Django's default for now # https://github.com/django/django/blob/adae61942/django/contrib/auth/hashers.py#L315 - KEY_ITERATIONS: Final[int] = 1_000_000 - def setup_crypto(self, salt: Optional[str]): - self.salt = salt or os.urandom(16).hex() - self.fernet = self.get_fernet(self.passphrase, self.salt) + # Set the defaults to be used during export + # During import, these are overridden from the loaded values to ensure decryption is possible + key_iterations = 1_000_000 + salt_size = 16 + key_size = 32 + kdf_algorithm = "pbkdf2_sha256" - def get_fernet(self, passphrase: str, salt: str) -> Fernet: + def setup_crypto(self, *, passphrase: str, salt: Optional[str] = None): """ Constructs a class for encryption or decryption using the specified passphrase and salt - Salt is assumed to be a hexadecimal representation of a cryptographically secure random byte string + Salt is assumed to be a hexadecimal representation of a cryptographically secure random byte string. + If not provided, it will be derived from the system secure random """ + self.salt = salt or os.urandom(self.salt_size).hex() + + # Derive the KDF based on loaded settings + if self.kdf_algorithm == "pbkdf2_sha256": + kdf = PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=self.key_size, + salt=bytes.fromhex(self.salt), + iterations=self.key_iterations, + ) + else: + raise CommandError( + f"{self.kdf_algorithm} is an unknown key derivation function", + ) - kdf = PBKDF2HMAC( - algorithm=hashes.SHA256(), - length=32, - salt=bytes.fromhex(salt), - iterations=480000, - ) key = base64.urlsafe_b64encode(kdf.derive(passphrase.encode())) - return Fernet(key) - def encrypt_field(self, value: str) -> str: + self.fernet = Fernet(key) + + def encrypt_string(self, *, value: str) -> str: """ - Given a string field value, encrypts it and returns the hexadecimal representation of the encrypted token + Given a string value, encrypts it and returns the hexadecimal representation of the encrypted token + """ return self.fernet.encrypt(value.encode("utf-8")).hex() - def decrypt_field(self, value: str) -> str: + def decrypt_string(self, *, value: str) -> str: """ - Given a string field value, decrypts it and returns the original value of the field + Given a string value, decrypts it and returns the original value of the field """ return self.fernet.decrypt(bytes.fromhex(value)).decode("utf-8") diff --git a/src/documents/settings.py b/src/documents/settings.py index 9f695314f..5c2c74404 100644 --- a/src/documents/settings.py +++ b/src/documents/settings.py @@ -3,4 +3,9 @@ EXPORTER_FILE_NAME = "__exported_file_name__" EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__" EXPORTER_ARCHIVE_NAME = "__exported_archive_name__" -EXPORTER_SALT_NAME = "__salt_hex__" + +EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__" +EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__" +EXPORTER_CRYPTO_KEY_ITERATIONS_NAME = "__key_iters__" +EXPORTER_CRYPTO_KEY_SIZE_NAME = "__key_iters__" +EXPORTER_CRYPTO_ALGO_NAME = "__key_algo__" diff --git a/src/documents/tests/test_management_exporter.py b/src/documents/tests/test_management_exporter.py index fae8dc2ee..dca3bf90b 100644 --- a/src/documents/tests/test_management_exporter.py +++ b/src/documents/tests/test_management_exporter.py @@ -840,3 +840,6 @@ class TestExportImport( ) self.assertEqual(Document.objects.all().count(), 4) + + def test_export_passphrase(self): + pass