From b21b4740df994f5f3d38ebe9d04a432c8a30ee2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Heuer?= Date: Fri, 15 Sep 2023 23:37:43 +0200 Subject: [PATCH] Added support for subdir as owner consumption --- docs/configuration.md | 13 +++ .../management/commands/document_consumer.py | 32 +++++- src/documents/tests/test_consumer.py | 11 +++ .../tests/test_management_consumer.py | 99 +++++++++++++++++++ src/paperless/settings.py | 4 + 5 files changed, 157 insertions(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 74486660f..26c264dbd 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -899,6 +899,19 @@ don't exist yet. Defaults to false. +`PAPERLESS_CONSUMER_SUBDIR_AS_OWNER=` + +: Set the name of the first subdirectory as owner for consumed files. E.g. +`/user1/file.pdf` will set the owner with username "user1" to the consumed +file. Paperless will not create a user that don't exist yet and the file will not have an owner. + + This is useful for if you have different users in your system. Each user places + their files in the their own folders. These folders won't be deleted. + + PAPERLESS_CONSUMER_RECURSIVE must be enabled for this to work. + + Defaults to false. + `PAPERLESS_CONSUMER_IGNORE_PATTERNS=` : By default, paperless ignores certain files and folders in the diff --git a/src/documents/management/commands/document_consumer.py b/src/documents/management/commands/document_consumer.py index 085c180ae..1f9895219 100644 --- a/src/documents/management/commands/document_consumer.py +++ b/src/documents/management/commands/document_consumer.py @@ -10,6 +10,7 @@ from time import sleep from typing import Final from django.conf import settings +from django.contrib.auth.models import User from django.core.management.base import BaseCommand from django.core.management.base import CommandError from watchdog.events import FileSystemEventHandler @@ -40,7 +41,13 @@ def _tags_from_path(filepath) -> set[Tag]: """ tag_ids = set() path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts - for part in path_parts: + for index, part in enumerate(path_parts): + # If first subdir should be interpreted as owner + # this subdir should not be added as a tag + if index == 0 and settings.CONSUMER_SUBDIR_AS_OWNER: + owner_id = User.objects.get(username__iexact=part).pk + if owner_id: + continue tag_ids.add( Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk, ) @@ -48,6 +55,20 @@ def _tags_from_path(filepath) -> set[Tag]: return tag_ids +def _owner_from_path(filepath) -> int: + """ + Check first subfolder from filepath below CONSUMPTION_DIR, + check if subfolder is equals to an existing user and return user id. + + Returns Owner ID or None + """ + owner_id = None + path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts + owner_id = User.objects.get(username__iexact=path_parts[0]).pk + + return owner_id + + def _is_ignored(filepath: str) -> bool: """ Checks if the given file should be ignored, based on configured @@ -123,6 +144,13 @@ def _consume(filepath: str) -> None: except Exception: logger.exception("Error creating tags from path") + owner_id = None + try: + if settings.CONSUMER_SUBDIR_AS_OWNER: + owner_id = _owner_from_path(filepath) + except Exception: + logger.exception("Error setting owner from path") + try: logger.info(f"Adding {filepath} to the task queue.") consume_file.delay( @@ -130,7 +158,7 @@ def _consume(filepath: str) -> None: source=DocumentSource.ConsumeFolder, original_file=filepath, ), - DocumentMetadataOverrides(tag_ids=tag_ids), + DocumentMetadataOverrides(tag_ids=tag_ids, owner_id=owner_id), ) except Exception: # Catch all so that the consumer won't crash. diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py index 70227b0db..b7aab60b1 100644 --- a/src/documents/tests/test_consumer.py +++ b/src/documents/tests/test_consumer.py @@ -11,6 +11,7 @@ from unittest.mock import MagicMock from dateutil import tz from django.conf import settings +from django.contrib.auth.models import User from django.test import TestCase from django.test import override_settings from django.utils import timezone @@ -445,6 +446,16 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertIn(t3, document.tags.all()) self._assert_first_last_send_progress() + def testOverrideOwner(self): + u1 = User.objects.create(username="u1") + + document1 = self.consumer.try_consume_file( + self.get_test_file(), + override_owner_id=u1.pk, + ) + + self.assertEqual(document1.owner.pk, u1.pk) + def testNotAFile(self): self.assertRaisesMessage( ConsumerError, diff --git a/src/documents/tests/test_management_consumer.py b/src/documents/tests/test_management_consumer.py index 99d5d410e..8e6baa2da 100644 --- a/src/documents/tests/test_management_consumer.py +++ b/src/documents/tests/test_management_consumer.py @@ -7,6 +7,7 @@ from time import sleep from unittest import mock from django.conf import settings +from django.contrib.auth.models import User from django.core.management import CommandError from django.core.management import call_command from django.test import TransactionTestCase @@ -411,3 +412,101 @@ class TestConsumerTags(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCas ) def test_consume_file_with_path_tags_polling(self): self.test_consume_file_with_path_tags() + + +class TestConsumerOwner(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase): + @override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIR_AS_OWNER=True) + def test_consume_file_with_path_owner(self): + owner_name = "User1" + # Create a user prior to consuming a file using it in path + owner_id = User.objects.create(username=owner_name).pk + + self.t_start() + + path = os.path.join(self.dirs.consumption_dir, owner_name) + os.makedirs(path, exist_ok=True) + f = Path(os.path.join(path, "my_file.pdf")) + # Wait at least inotify read_delay for recursive watchers + # to be created for the new directories + sleep(1) + shutil.copy(self.sample_file, f) + + self.wait_for_task_mock_call() + + self.consume_file_mock.assert_called_once() + + input_doc, overrides = self.get_last_consume_delay_call_args() + + self.assertEqual(input_doc.original_file, f) + self.assertEqual(overrides.owner_id, owner_id) + + @override_settings( + CONSUMER_POLLING=1, + CONSUMER_POLLING_DELAY=3, + CONSUMER_POLLING_RETRY_COUNT=20, + ) + def test_consume_file_with_path_owner_polling(self): + self.test_consume_file_with_path_owner() + + @override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIR_AS_OWNER=True) + def test_consume_file_with_path_no_owner(self): + self.t_start() + + # Create a random sub-folder that is not matching to a user + path = os.path.join(self.dirs.consumption_dir, "random_folder") + os.makedirs(path, exist_ok=True) + f = Path(os.path.join(path, "my_file.pdf")) + # Wait at least inotify read_delay for recursive watchers + # to be created for the new directories + sleep(1) + shutil.copy(self.sample_file, f) + + self.wait_for_task_mock_call() + + self.consume_file_mock.assert_called_once() + + input_doc, overrides = self.get_last_consume_delay_call_args() + + self.assertEqual(input_doc.original_file, f) + self.assertIsNone(overrides.owner_id) + + @override_settings( + CONSUMER_RECURSIVE=True, + CONSUMER_SUBDIR_AS_OWNER=True, + CONSUMER_SUBDIRS_AS_TAGS=True, + ) + def test_consume_file_with_path_owner_and_tags(self): + owner_name = "User1" + # Create a user prior to consuming a file using it in path + owner_id = User.objects.create(username=owner_name).pk + + tag_names = ("existingTag", "Space Tag") + # Create a Tag prior to consuming a file using it in path + tag_ids = [ + Tag.objects.create(name="existingtag").pk, + ] + + self.t_start() + + # Create a random sub-folder that is not matching to a user + path = os.path.join(self.dirs.consumption_dir, owner_name, *tag_names) + + os.makedirs(path, exist_ok=True) + f = Path(os.path.join(path, "my_file.pdf")) + # Wait at least inotify read_delay for recursive watchers + # to be created for the new directories + sleep(1) + shutil.copy(self.sample_file, f) + + self.wait_for_task_mock_call() + + self.consume_file_mock.assert_called_once() + + input_doc, overrides = self.get_last_consume_delay_call_args() + + # Add the pk of the Tag created by _consume() + tag_ids.append(Tag.objects.get(name=tag_names[1]).pk) + + self.assertEqual(input_doc.original_file, f) + self.assertEqual(overrides.owner_id, owner_id) + self.assertCountEqual(overrides.tag_ids, tag_ids) diff --git a/src/paperless/settings.py b/src/paperless/settings.py index 6d25a53cc..da7df73a6 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -819,6 +819,10 @@ CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean( "PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT", ) +CONSUMER_SUBDIR_AS_OWNER: Final[bool] = __get_boolean( + "PAPERLESS_CONSUMER_SUBDIR_AS_OWNER", +) + OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0)) # The default language that tesseract will attempt to use when parsing