Refinements to the flow

This commit is contained in:
Trenton H 2024-06-05 12:48:42 -07:00
parent 19d6a2a256
commit 6d5812fe45
5 changed files with 100 additions and 42 deletions

View File

@ -47,8 +47,12 @@ from documents.models import Workflow
from documents.models import WorkflowAction from documents.models import WorkflowAction
from documents.models import WorkflowTrigger from documents.models import WorkflowTrigger
from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_ALGO_NAME
from documents.settings import EXPORTER_CRYPTO_KEY_ITERATIONS_NAME
from documents.settings import EXPORTER_CRYPTO_KEY_SIZE_NAME
from documents.settings import EXPORTER_CRYPTO_SALT_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SALT_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
from paperless import version from paperless import version
@ -364,11 +368,19 @@ class Command(SecurityMixin, BaseCommand):
# 4.2 write version information to target folder # 4.2 write version information to target folder
extra_metadata_path = (self.target / "metadata.json").resolve() extra_metadata_path = (self.target / "metadata.json").resolve()
metadata = {"version": version.__full_version_str__} metadata: dict[str, str | int | dict[str, str | int]] = {
"version": version.__full_version_str__,
}
# 4.2.1 If needed, write the salt value into the metadata # 4.2.1 If needed, write the crypto values into the metadata
# Django stores most of these in the field itself, we store them once here
if self.passphrase: if self.passphrase:
metadata[EXPORTER_SALT_NAME] = self.salt metadata[EXPORTER_CRYPTO_SETTINGS_NAME] = {
EXPORTER_CRYPTO_ALGO_NAME: self.kdf_algorithm,
EXPORTER_CRYPTO_KEY_ITERATIONS_NAME: self.key_iterations,
EXPORTER_CRYPTO_SALT_NAME: self.salt,
EXPORTER_CRYPTO_KEY_SIZE_NAME: self.key_size,
}
extra_metadata_path.write_text( extra_metadata_path.write_text(
json.dumps( json.dumps(
metadata, metadata,
@ -544,11 +556,13 @@ class Command(SecurityMixin, BaseCommand):
copy_file_with_basic_stats(source, target) copy_file_with_basic_stats(source, target)
def encrypt_secret_fields(self, manifest: dict) -> None: def encrypt_secret_fields(self, manifest: dict) -> None:
""" """ """
Encrypts certain fields in the export. Currently limited to the mail account password
"""
if self.passphrase: if self.passphrase:
self.setup_crypto() self.setup_crypto(passphrase=self.passphrase)
for mail_account_record in manifest["mail_accounts"]: for mail_account_record in manifest["mail_accounts"]:
mail_account_record["password"] = self.encrypt_field( mail_account_record["password"] = self.encrypt_string(
mail_account_record["password"], value=mail_account_record["password"],
) )

View File

@ -32,8 +32,10 @@ from documents.models import Note
from documents.models import Tag from documents.models import Tag
from documents.parsers import run_convert from documents.parsers import run_convert
from documents.settings import EXPORTER_ARCHIVE_NAME from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_ALGO_NAME
from documents.settings import EXPORTER_CRYPTO_SALT_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SALT_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.signals.handlers import update_filename_and_move_files from documents.signals.handlers import update_filename_and_move_files
from documents.utils import copy_file_with_basic_stats from documents.utils import copy_file_with_basic_stats
@ -173,12 +175,24 @@ class Command(SecurityMixin, BaseCommand):
with metadata_path.open() as infile: with metadata_path.open() as infile:
data = json.load(infile) data = json.load(infile)
self.version = data["version"] self.version = data["version"]
if not self.passphrase and EXPORTER_SALT_NAME in data: if not self.passphrase and EXPORTER_CRYPTO_SETTINGS_NAME in data:
raise CommandError( raise CommandError(
"No passphrase was given, but this export contains encrypted fields", "No passphrase was given, but this export contains encrypted fields",
) )
elif EXPORTER_SALT_NAME in data: elif EXPORTER_CRYPTO_SETTINGS_NAME in data:
self.salt = data[EXPORTER_SALT_NAME] # Load up the values for setting up decryption
self.kdf_algorithm = data[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_ALGO_NAME
]
self.key_iterations = data[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_ALGO_NAME
]
self.key_size = data[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_ALGO_NAME
]
self.salt = data[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_SALT_NAME
]
if self.version and self.version != version.__full_version_str__: if self.version and self.version != version.__full_version_str__:
self.stdout.write( self.stdout.write(
@ -323,20 +337,12 @@ class Command(SecurityMixin, BaseCommand):
f"Failed to read from archive file {doc_archive_path}", f"Failed to read from archive file {doc_archive_path}",
) from e ) from e
def check_acount_account_valid(mail_account_record: dict):
if EXPORTER_SALT_NAME in mail_account_record and not self.passphrase:
raise CommandError(
"The manifest file contains encrypted mail account passwords, but no passphrase was provided",
)
self.stdout.write("Checking the manifest") self.stdout.write("Checking the manifest")
for record in self.manifest: for record in self.manifest:
# Only check if the document files exist if this is not data only # Only check if the document files exist if this is not data only
# We don't care about documents for a data only import # We don't care about documents for a data only import
if not self.data_only and record["model"] == "documents.document": if not self.data_only and record["model"] == "documents.document":
check_document_validity(record) check_document_validity(record)
elif record["model"] == "paperless_mail.mailaccount":
check_acount_account_valid(record)
def _import_files_from_manifest(self): def _import_files_from_manifest(self):
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True) settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
@ -405,11 +411,13 @@ class Command(SecurityMixin, BaseCommand):
document.save() document.save()
def decrypt_secret_fields(self) -> None: def decrypt_secret_fields(self) -> None:
""" """ """
The converse decryption of some fields out of the export before importing to database
"""
if self.passphrase: if self.passphrase:
# Salt has been loaded from metadata.json at this point, so it cannot be None # Salt has been loaded from metadata.json at this point, so it cannot be None
self.setup_crypto(self.salt) self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
for record in self.manifest: for record in self.manifest:
if record["model"] == "paperless_mail.mailaccount": if record["model"] == "paperless_mail.mailaccount":
record["password"] = self.decrypt_field(record["password"]) record["password"] = self.decrypt_string(value=record["password"])

View File

@ -1,7 +1,6 @@
import base64 import base64
import os import os
from argparse import ArgumentParser from argparse import ArgumentParser
from typing import Final
from typing import Optional from typing import Optional
from cryptography.fernet import Fernet from cryptography.fernet import Fernet
@ -51,41 +50,70 @@ class ProgressBarMixin:
class SecurityMixin: class SecurityMixin:
""" """
Fully based on:
https://cryptography.io/en/latest/fernet/#using-passwords-with-fernet https://cryptography.io/en/latest/fernet/#using-passwords-with-fernet
To encrypt:
1. Call setup_crypto providing the user provided passphrase
2. Call encrypt_string with a value
3. Store the returned hexadecimal representation of the value
To decrypt:
1. Load the required parameters:
a. key iterations
b. key size
c. key algorithm
2. Call setup_crypto providing the user provided passphrase and stored salt
3. Call decrypt_string with a value
4. Use the returned value
""" """
# This matches to Django's default for now # This matches to Django's default for now
# https://github.com/django/django/blob/adae61942/django/contrib/auth/hashers.py#L315 # https://github.com/django/django/blob/adae61942/django/contrib/auth/hashers.py#L315
KEY_ITERATIONS: Final[int] = 1_000_000
def setup_crypto(self, salt: Optional[str]): # Set the defaults to be used during export
self.salt = salt or os.urandom(16).hex() # During import, these are overridden from the loaded values to ensure decryption is possible
self.fernet = self.get_fernet(self.passphrase, self.salt) key_iterations = 1_000_000
salt_size = 16
key_size = 32
kdf_algorithm = "pbkdf2_sha256"
def get_fernet(self, passphrase: str, salt: str) -> Fernet: def setup_crypto(self, *, passphrase: str, salt: Optional[str] = None):
""" """
Constructs a class for encryption or decryption using the specified passphrase and salt Constructs a class for encryption or decryption using the specified passphrase and salt
Salt is assumed to be a hexadecimal representation of a cryptographically secure random byte string Salt is assumed to be a hexadecimal representation of a cryptographically secure random byte string.
If not provided, it will be derived from the system secure random
""" """
self.salt = salt or os.urandom(self.salt_size).hex()
# Derive the KDF based on loaded settings
if self.kdf_algorithm == "pbkdf2_sha256":
kdf = PBKDF2HMAC( kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(), algorithm=hashes.SHA256(),
length=32, length=self.key_size,
salt=bytes.fromhex(salt), salt=bytes.fromhex(self.salt),
iterations=480000, iterations=self.key_iterations,
)
else:
raise CommandError(
f"{self.kdf_algorithm} is an unknown key derivation function",
) )
key = base64.urlsafe_b64encode(kdf.derive(passphrase.encode()))
return Fernet(key)
def encrypt_field(self, value: str) -> str: key = base64.urlsafe_b64encode(kdf.derive(passphrase.encode()))
self.fernet = Fernet(key)
def encrypt_string(self, *, value: str) -> str:
""" """
Given a string field value, encrypts it and returns the hexadecimal representation of the encrypted token Given a string value, encrypts it and returns the hexadecimal representation of the encrypted token
""" """
return self.fernet.encrypt(value.encode("utf-8")).hex() return self.fernet.encrypt(value.encode("utf-8")).hex()
def decrypt_field(self, value: str) -> str: def decrypt_string(self, *, value: str) -> str:
""" """
Given a string field value, decrypts it and returns the original value of the field Given a string value, decrypts it and returns the original value of the field
""" """
return self.fernet.decrypt(bytes.fromhex(value)).decode("utf-8") return self.fernet.decrypt(bytes.fromhex(value)).decode("utf-8")

View File

@ -3,4 +3,9 @@
EXPORTER_FILE_NAME = "__exported_file_name__" EXPORTER_FILE_NAME = "__exported_file_name__"
EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__" EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__"
EXPORTER_ARCHIVE_NAME = "__exported_archive_name__" EXPORTER_ARCHIVE_NAME = "__exported_archive_name__"
EXPORTER_SALT_NAME = "__salt_hex__"
EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__"
EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__"
EXPORTER_CRYPTO_KEY_ITERATIONS_NAME = "__key_iters__"
EXPORTER_CRYPTO_KEY_SIZE_NAME = "__key_iters__"
EXPORTER_CRYPTO_ALGO_NAME = "__key_algo__"

View File

@ -840,3 +840,6 @@ class TestExportImport(
) )
self.assertEqual(Document.objects.all().count(), 4) self.assertEqual(Document.objects.all().count(), 4)
def test_export_passphrase(self):
pass