Refinements to the flow

This commit is contained in:
Trenton H 2024-06-05 12:48:42 -07:00
parent 19d6a2a256
commit 6d5812fe45
5 changed files with 100 additions and 42 deletions

View File

@ -47,8 +47,12 @@ from documents.models import Workflow
from documents.models import WorkflowAction
from documents.models import WorkflowTrigger
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_ALGO_NAME
from documents.settings import EXPORTER_CRYPTO_KEY_ITERATIONS_NAME
from documents.settings import EXPORTER_CRYPTO_KEY_SIZE_NAME
from documents.settings import EXPORTER_CRYPTO_SALT_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SALT_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.utils import copy_file_with_basic_stats
from paperless import version
@ -364,11 +368,19 @@ class Command(SecurityMixin, BaseCommand):
# 4.2 write version information to target folder
extra_metadata_path = (self.target / "metadata.json").resolve()
metadata = {"version": version.__full_version_str__}
metadata: dict[str, str | int | dict[str, str | int]] = {
"version": version.__full_version_str__,
}
# 4.2.1 If needed, write the salt value into the metadata
# 4.2.1 If needed, write the crypto values into the metadata
# Django stores most of these in the field itself, we store them once here
if self.passphrase:
metadata[EXPORTER_SALT_NAME] = self.salt
metadata[EXPORTER_CRYPTO_SETTINGS_NAME] = {
EXPORTER_CRYPTO_ALGO_NAME: self.kdf_algorithm,
EXPORTER_CRYPTO_KEY_ITERATIONS_NAME: self.key_iterations,
EXPORTER_CRYPTO_SALT_NAME: self.salt,
EXPORTER_CRYPTO_KEY_SIZE_NAME: self.key_size,
}
extra_metadata_path.write_text(
json.dumps(
metadata,
@ -544,11 +556,13 @@ class Command(SecurityMixin, BaseCommand):
copy_file_with_basic_stats(source, target)
def encrypt_secret_fields(self, manifest: dict) -> None:
""" """
"""
Encrypts certain fields in the export. Currently limited to the mail account password
"""
if self.passphrase:
self.setup_crypto()
self.setup_crypto(passphrase=self.passphrase)
for mail_account_record in manifest["mail_accounts"]:
mail_account_record["password"] = self.encrypt_field(
mail_account_record["password"],
mail_account_record["password"] = self.encrypt_string(
value=mail_account_record["password"],
)

View File

@ -32,8 +32,10 @@ from documents.models import Note
from documents.models import Tag
from documents.parsers import run_convert
from documents.settings import EXPORTER_ARCHIVE_NAME
from documents.settings import EXPORTER_CRYPTO_ALGO_NAME
from documents.settings import EXPORTER_CRYPTO_SALT_NAME
from documents.settings import EXPORTER_CRYPTO_SETTINGS_NAME
from documents.settings import EXPORTER_FILE_NAME
from documents.settings import EXPORTER_SALT_NAME
from documents.settings import EXPORTER_THUMBNAIL_NAME
from documents.signals.handlers import update_filename_and_move_files
from documents.utils import copy_file_with_basic_stats
@ -173,12 +175,24 @@ class Command(SecurityMixin, BaseCommand):
with metadata_path.open() as infile:
data = json.load(infile)
self.version = data["version"]
if not self.passphrase and EXPORTER_SALT_NAME in data:
if not self.passphrase and EXPORTER_CRYPTO_SETTINGS_NAME in data:
raise CommandError(
"No passphrase was given, but this export contains encrypted fields",
)
elif EXPORTER_SALT_NAME in data:
self.salt = data[EXPORTER_SALT_NAME]
elif EXPORTER_CRYPTO_SETTINGS_NAME in data:
# Load up the values for setting up decryption
self.kdf_algorithm = data[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_ALGO_NAME
]
self.key_iterations = data[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_ALGO_NAME
]
self.key_size = data[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_ALGO_NAME
]
self.salt = data[EXPORTER_CRYPTO_SETTINGS_NAME][
EXPORTER_CRYPTO_SALT_NAME
]
if self.version and self.version != version.__full_version_str__:
self.stdout.write(
@ -323,20 +337,12 @@ class Command(SecurityMixin, BaseCommand):
f"Failed to read from archive file {doc_archive_path}",
) from e
def check_acount_account_valid(mail_account_record: dict):
if EXPORTER_SALT_NAME in mail_account_record and not self.passphrase:
raise CommandError(
"The manifest file contains encrypted mail account passwords, but no passphrase was provided",
)
self.stdout.write("Checking the manifest")
for record in self.manifest:
# Only check if the document files exist if this is not data only
# We don't care about documents for a data only import
if not self.data_only and record["model"] == "documents.document":
check_document_validity(record)
elif record["model"] == "paperless_mail.mailaccount":
check_acount_account_valid(record)
def _import_files_from_manifest(self):
settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True)
@ -405,11 +411,13 @@ class Command(SecurityMixin, BaseCommand):
document.save()
def decrypt_secret_fields(self) -> None:
""" """
"""
The converse decryption of some fields out of the export before importing to database
"""
if self.passphrase:
# Salt has been loaded from metadata.json at this point, so it cannot be None
self.setup_crypto(self.salt)
self.setup_crypto(passphrase=self.passphrase, salt=self.salt)
for record in self.manifest:
if record["model"] == "paperless_mail.mailaccount":
record["password"] = self.decrypt_field(record["password"])
record["password"] = self.decrypt_string(value=record["password"])

View File

@ -1,7 +1,6 @@
import base64
import os
from argparse import ArgumentParser
from typing import Final
from typing import Optional
from cryptography.fernet import Fernet
@ -51,41 +50,70 @@ class ProgressBarMixin:
class SecurityMixin:
"""
Fully based on:
https://cryptography.io/en/latest/fernet/#using-passwords-with-fernet
To encrypt:
1. Call setup_crypto providing the user provided passphrase
2. Call encrypt_string with a value
3. Store the returned hexadecimal representation of the value
To decrypt:
1. Load the required parameters:
a. key iterations
b. key size
c. key algorithm
2. Call setup_crypto providing the user provided passphrase and stored salt
3. Call decrypt_string with a value
4. Use the returned value
"""
# This matches to Django's default for now
# https://github.com/django/django/blob/adae61942/django/contrib/auth/hashers.py#L315
KEY_ITERATIONS: Final[int] = 1_000_000
def setup_crypto(self, salt: Optional[str]):
self.salt = salt or os.urandom(16).hex()
self.fernet = self.get_fernet(self.passphrase, self.salt)
# Set the defaults to be used during export
# During import, these are overridden from the loaded values to ensure decryption is possible
key_iterations = 1_000_000
salt_size = 16
key_size = 32
kdf_algorithm = "pbkdf2_sha256"
def get_fernet(self, passphrase: str, salt: str) -> Fernet:
def setup_crypto(self, *, passphrase: str, salt: Optional[str] = None):
"""
Constructs a class for encryption or decryption using the specified passphrase and salt
Salt is assumed to be a hexadecimal representation of a cryptographically secure random byte string
Salt is assumed to be a hexadecimal representation of a cryptographically secure random byte string.
If not provided, it will be derived from the system secure random
"""
self.salt = salt or os.urandom(self.salt_size).hex()
# Derive the KDF based on loaded settings
if self.kdf_algorithm == "pbkdf2_sha256":
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=self.key_size,
salt=bytes.fromhex(self.salt),
iterations=self.key_iterations,
)
else:
raise CommandError(
f"{self.kdf_algorithm} is an unknown key derivation function",
)
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=bytes.fromhex(salt),
iterations=480000,
)
key = base64.urlsafe_b64encode(kdf.derive(passphrase.encode()))
return Fernet(key)
def encrypt_field(self, value: str) -> str:
self.fernet = Fernet(key)
def encrypt_string(self, *, value: str) -> str:
"""
Given a string field value, encrypts it and returns the hexadecimal representation of the encrypted token
Given a string value, encrypts it and returns the hexadecimal representation of the encrypted token
"""
return self.fernet.encrypt(value.encode("utf-8")).hex()
def decrypt_field(self, value: str) -> str:
def decrypt_string(self, *, value: str) -> str:
"""
Given a string field value, decrypts it and returns the original value of the field
Given a string value, decrypts it and returns the original value of the field
"""
return self.fernet.decrypt(bytes.fromhex(value)).decode("utf-8")

View File

@ -3,4 +3,9 @@
EXPORTER_FILE_NAME = "__exported_file_name__"
EXPORTER_THUMBNAIL_NAME = "__exported_thumbnail_name__"
EXPORTER_ARCHIVE_NAME = "__exported_archive_name__"
EXPORTER_SALT_NAME = "__salt_hex__"
EXPORTER_CRYPTO_SETTINGS_NAME = "__crypto__"
EXPORTER_CRYPTO_SALT_NAME = "__salt_hex__"
EXPORTER_CRYPTO_KEY_ITERATIONS_NAME = "__key_iters__"
EXPORTER_CRYPTO_KEY_SIZE_NAME = "__key_iters__"
EXPORTER_CRYPTO_ALGO_NAME = "__key_algo__"

View File

@ -840,3 +840,6 @@ class TestExportImport(
)
self.assertEqual(Document.objects.all().count(), 4)
def test_export_passphrase(self):
pass