Added support for subdir as owner consumption

This commit is contained in:
André Heuer 2023-09-15 23:37:43 +02:00
parent 5ee9ad3e4f
commit b21b4740df
5 changed files with 157 additions and 2 deletions

View File

@ -899,6 +899,19 @@ don't exist yet.
Defaults to false. Defaults to false.
`PAPERLESS_CONSUMER_SUBDIR_AS_OWNER=<bool>`
: Set the name of the first subdirectory as owner for consumed files. E.g.
`<CONSUMPTION_DIR>/user1/file.pdf` will set the owner with username "user1" to the consumed
file. Paperless will not create a user that don't exist yet and the file will not have an owner.
This is useful for if you have different users in your system. Each user places
their files in the their own folders. These folders won't be deleted.
PAPERLESS_CONSUMER_RECURSIVE must be enabled for this to work.
Defaults to false.
`PAPERLESS_CONSUMER_IGNORE_PATTERNS=<json>` `PAPERLESS_CONSUMER_IGNORE_PATTERNS=<json>`
: By default, paperless ignores certain files and folders in the : By default, paperless ignores certain files and folders in the

View File

@ -10,6 +10,7 @@ from time import sleep
from typing import Final from typing import Final
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
from django.core.management.base import CommandError from django.core.management.base import CommandError
from watchdog.events import FileSystemEventHandler from watchdog.events import FileSystemEventHandler
@ -40,7 +41,13 @@ def _tags_from_path(filepath) -> set[Tag]:
""" """
tag_ids = set() tag_ids = set()
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
for part in path_parts: for index, part in enumerate(path_parts):
# If first subdir should be interpreted as owner
# this subdir should not be added as a tag
if index == 0 and settings.CONSUMER_SUBDIR_AS_OWNER:
owner_id = User.objects.get(username__iexact=part).pk
if owner_id:
continue
tag_ids.add( tag_ids.add(
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk, Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
) )
@ -48,6 +55,20 @@ def _tags_from_path(filepath) -> set[Tag]:
return tag_ids return tag_ids
def _owner_from_path(filepath) -> int:
"""
Check first subfolder from filepath below CONSUMPTION_DIR,
check if subfolder is equals to an existing user and return user id.
Returns Owner ID or None
"""
owner_id = None
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
owner_id = User.objects.get(username__iexact=path_parts[0]).pk
return owner_id
def _is_ignored(filepath: str) -> bool: def _is_ignored(filepath: str) -> bool:
""" """
Checks if the given file should be ignored, based on configured Checks if the given file should be ignored, based on configured
@ -123,6 +144,13 @@ def _consume(filepath: str) -> None:
except Exception: except Exception:
logger.exception("Error creating tags from path") logger.exception("Error creating tags from path")
owner_id = None
try:
if settings.CONSUMER_SUBDIR_AS_OWNER:
owner_id = _owner_from_path(filepath)
except Exception:
logger.exception("Error setting owner from path")
try: try:
logger.info(f"Adding {filepath} to the task queue.") logger.info(f"Adding {filepath} to the task queue.")
consume_file.delay( consume_file.delay(
@ -130,7 +158,7 @@ def _consume(filepath: str) -> None:
source=DocumentSource.ConsumeFolder, source=DocumentSource.ConsumeFolder,
original_file=filepath, original_file=filepath,
), ),
DocumentMetadataOverrides(tag_ids=tag_ids), DocumentMetadataOverrides(tag_ids=tag_ids, owner_id=owner_id),
) )
except Exception: except Exception:
# Catch all so that the consumer won't crash. # Catch all so that the consumer won't crash.

View File

@ -11,6 +11,7 @@ from unittest.mock import MagicMock
from dateutil import tz from dateutil import tz
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User
from django.test import TestCase from django.test import TestCase
from django.test import override_settings from django.test import override_settings
from django.utils import timezone from django.utils import timezone
@ -445,6 +446,16 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertIn(t3, document.tags.all()) self.assertIn(t3, document.tags.all())
self._assert_first_last_send_progress() self._assert_first_last_send_progress()
def testOverrideOwner(self):
u1 = User.objects.create(username="u1")
document1 = self.consumer.try_consume_file(
self.get_test_file(),
override_owner_id=u1.pk,
)
self.assertEqual(document1.owner.pk, u1.pk)
def testNotAFile(self): def testNotAFile(self):
self.assertRaisesMessage( self.assertRaisesMessage(
ConsumerError, ConsumerError,

View File

@ -7,6 +7,7 @@ from time import sleep
from unittest import mock from unittest import mock
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User
from django.core.management import CommandError from django.core.management import CommandError
from django.core.management import call_command from django.core.management import call_command
from django.test import TransactionTestCase from django.test import TransactionTestCase
@ -411,3 +412,101 @@ class TestConsumerTags(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCas
) )
def test_consume_file_with_path_tags_polling(self): def test_consume_file_with_path_tags_polling(self):
self.test_consume_file_with_path_tags() self.test_consume_file_with_path_tags()
class TestConsumerOwner(DirectoriesMixin, ConsumerThreadMixin, TransactionTestCase):
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIR_AS_OWNER=True)
def test_consume_file_with_path_owner(self):
owner_name = "User1"
# Create a user prior to consuming a file using it in path
owner_id = User.objects.create(username=owner_name).pk
self.t_start()
path = os.path.join(self.dirs.consumption_dir, owner_name)
os.makedirs(path, exist_ok=True)
f = Path(os.path.join(path, "my_file.pdf"))
# Wait at least inotify read_delay for recursive watchers
# to be created for the new directories
sleep(1)
shutil.copy(self.sample_file, f)
self.wait_for_task_mock_call()
self.consume_file_mock.assert_called_once()
input_doc, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, f)
self.assertEqual(overrides.owner_id, owner_id)
@override_settings(
CONSUMER_POLLING=1,
CONSUMER_POLLING_DELAY=3,
CONSUMER_POLLING_RETRY_COUNT=20,
)
def test_consume_file_with_path_owner_polling(self):
self.test_consume_file_with_path_owner()
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_SUBDIR_AS_OWNER=True)
def test_consume_file_with_path_no_owner(self):
self.t_start()
# Create a random sub-folder that is not matching to a user
path = os.path.join(self.dirs.consumption_dir, "random_folder")
os.makedirs(path, exist_ok=True)
f = Path(os.path.join(path, "my_file.pdf"))
# Wait at least inotify read_delay for recursive watchers
# to be created for the new directories
sleep(1)
shutil.copy(self.sample_file, f)
self.wait_for_task_mock_call()
self.consume_file_mock.assert_called_once()
input_doc, overrides = self.get_last_consume_delay_call_args()
self.assertEqual(input_doc.original_file, f)
self.assertIsNone(overrides.owner_id)
@override_settings(
CONSUMER_RECURSIVE=True,
CONSUMER_SUBDIR_AS_OWNER=True,
CONSUMER_SUBDIRS_AS_TAGS=True,
)
def test_consume_file_with_path_owner_and_tags(self):
owner_name = "User1"
# Create a user prior to consuming a file using it in path
owner_id = User.objects.create(username=owner_name).pk
tag_names = ("existingTag", "Space Tag")
# Create a Tag prior to consuming a file using it in path
tag_ids = [
Tag.objects.create(name="existingtag").pk,
]
self.t_start()
# Create a random sub-folder that is not matching to a user
path = os.path.join(self.dirs.consumption_dir, owner_name, *tag_names)
os.makedirs(path, exist_ok=True)
f = Path(os.path.join(path, "my_file.pdf"))
# Wait at least inotify read_delay for recursive watchers
# to be created for the new directories
sleep(1)
shutil.copy(self.sample_file, f)
self.wait_for_task_mock_call()
self.consume_file_mock.assert_called_once()
input_doc, overrides = self.get_last_consume_delay_call_args()
# Add the pk of the Tag created by _consume()
tag_ids.append(Tag.objects.get(name=tag_names[1]).pk)
self.assertEqual(input_doc.original_file, f)
self.assertEqual(overrides.owner_id, owner_id)
self.assertCountEqual(overrides.tag_ids, tag_ids)

View File

@ -819,6 +819,10 @@ CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT", "PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
) )
CONSUMER_SUBDIR_AS_OWNER: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_SUBDIR_AS_OWNER",
)
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0)) OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
# The default language that tesseract will attempt to use when parsing # The default language that tesseract will attempt to use when parsing