diff --git a/src/documents/management/commands/document_archiver.py b/src/documents/management/commands/document_archiver.py index 3fa37a7f1..40714e866 100644 --- a/src/documents/management/commands/document_archiver.py +++ b/src/documents/management/commands/document_archiver.py @@ -7,13 +7,15 @@ from django import db from django.conf import settings from django.core.management.base import BaseCommand +from documents.management.commands.mixins import MultiProcessMixin +from documents.management.commands.mixins import ProgressBarMixin from documents.models import Document from documents.tasks import update_document_archive_file logger = logging.getLogger("paperless.management.archiver") -class Command(BaseCommand): +class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand): help = ( "Using the current classification model, assigns correspondents, tags " "and document types to all documents, effectively allowing you to " @@ -43,20 +45,13 @@ class Command(BaseCommand): "run on this specific document." ), ) - parser.add_argument( - "--no-progress-bar", - default=False, - action="store_true", - help="If set, the progress bar will not be shown", - ) - parser.add_argument( - "--processes", - default=max(1, os.cpu_count() // 4), - type=int, - help="Number of processes to distribute work amongst", - ) + self.add_argument_progress_bar_mixin(parser) + self.add_argument_processes_mixin(parser) def handle(self, *args, **options): + self.handle_processes_mixin(**options) + self.handle_progress_bar_mixin(**options) + os.makedirs(settings.SCRATCH_DIR, exist_ok=True) overwrite = options["overwrite"] @@ -74,18 +69,18 @@ class Command(BaseCommand): ) # Note to future self: this prevents django from reusing database - # conncetions between processes, which is bad and does not work + # connections between processes, which is bad and does not work # with postgres. db.connections.close_all() try: logging.getLogger().handlers[0].level = logging.ERROR - with multiprocessing.Pool(processes=options["processes"]) as pool: + with multiprocessing.Pool(self.process_count) as pool: list( tqdm.tqdm( pool.imap_unordered(update_document_archive_file, document_ids), total=len(document_ids), - disable=options["no_progress_bar"], + disable=self.no_progress_bar, ), ) except KeyboardInterrupt: diff --git a/src/documents/management/commands/document_create_classifier.py b/src/documents/management/commands/document_create_classifier.py index 5362f17c8..f5df51aac 100644 --- a/src/documents/management/commands/document_create_classifier.py +++ b/src/documents/management/commands/document_create_classifier.py @@ -9,8 +9,5 @@ class Command(BaseCommand): "file. The document consumer will then automatically use this new model." ) - def __init__(self, *args, **kwargs): - BaseCommand.__init__(self, *args, **kwargs) - def handle(self, *args, **options): train_classifier() diff --git a/src/documents/management/commands/document_fuzzy_match.py b/src/documents/management/commands/document_fuzzy_match.py index 26ce55a39..63c640bee 100644 --- a/src/documents/management/commands/document_fuzzy_match.py +++ b/src/documents/management/commands/document_fuzzy_match.py @@ -7,6 +7,8 @@ import tqdm from django.core.management import BaseCommand from django.core.management import CommandError +from documents.management.commands.mixins import MultiProcessMixin +from documents.management.commands.mixins import ProgressBarMixin from documents.models import Document @@ -41,7 +43,7 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult: return _WorkResult(work.first_doc.pk, work.second_doc.pk, match) -class Command(BaseCommand): +class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand): help = "Searches for documents where the content almost matches" def add_arguments(self, parser): @@ -51,23 +53,16 @@ class Command(BaseCommand): type=float, help="Ratio to consider documents a match", ) - parser.add_argument( - "--processes", - default=4, - type=int, - help="Number of processes to distribute work amongst", - ) - parser.add_argument( - "--no-progress-bar", - default=False, - action="store_true", - help="If set, the progress bar will not be shown", - ) + self.add_argument_progress_bar_mixin(parser) + self.add_argument_processes_mixin(parser) def handle(self, *args, **options): RATIO_MIN: Final[float] = 0.0 RATIO_MAX: Final[float] = 100.0 + self.handle_processes_mixin(**options) + self.handle_progress_bar_mixin(**options) + opt_ratio = options["ratio"] checked_pairs: set[tuple[int, int]] = set() work_pkgs: list[_WorkPackage] = [] @@ -76,9 +71,6 @@ class Command(BaseCommand): if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX: raise CommandError("The ratio must be between 0 and 100") - if options["processes"] < 1: - raise CommandError("There must be at least 1 process") - all_docs = Document.objects.all().order_by("id") # Build work packages for processing @@ -103,7 +95,7 @@ class Command(BaseCommand): # Don't spin up a pool of 1 process if options["processes"] == 1: results = [] - for work in tqdm.tqdm(work_pkgs, disable=options["no_progress_bar"]): + for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar): results.append(_process_and_match(work)) else: with multiprocessing.Pool(processes=options["processes"]) as pool: @@ -111,7 +103,7 @@ class Command(BaseCommand): tqdm.tqdm( pool.imap_unordered(_process_and_match, work_pkgs), total=len(work_pkgs), - disable=options["no_progress_bar"], + disable=self.no_progress_bar, ), ) diff --git a/src/documents/management/commands/document_index.py b/src/documents/management/commands/document_index.py index 279408b36..1fa4f5a70 100644 --- a/src/documents/management/commands/document_index.py +++ b/src/documents/management/commands/document_index.py @@ -1,25 +1,22 @@ from django.core.management import BaseCommand from django.db import transaction +from documents.management.commands.mixins import ProgressBarMixin from documents.tasks import index_optimize from documents.tasks import index_reindex -class Command(BaseCommand): +class Command(ProgressBarMixin, BaseCommand): help = "Manages the document index." def add_arguments(self, parser): parser.add_argument("command", choices=["reindex", "optimize"]) - parser.add_argument( - "--no-progress-bar", - default=False, - action="store_true", - help="If set, the progress bar will not be shown", - ) + self.add_argument_progress_bar_mixin(parser) def handle(self, *args, **options): + self.handle_progress_bar_mixin(**options) with transaction.atomic(): if options["command"] == "reindex": - index_reindex(progress_bar_disable=options["no_progress_bar"]) + index_reindex(progress_bar_disable=self.no_progress_bar) elif options["command"] == "optimize": index_optimize() diff --git a/src/documents/management/commands/document_renamer.py b/src/documents/management/commands/document_renamer.py index acbfed8b2..25f8f2d21 100644 --- a/src/documents/management/commands/document_renamer.py +++ b/src/documents/management/commands/document_renamer.py @@ -4,25 +4,22 @@ import tqdm from django.core.management.base import BaseCommand from django.db.models.signals import post_save +from documents.management.commands.mixins import ProgressBarMixin from documents.models import Document -class Command(BaseCommand): +class Command(ProgressBarMixin, BaseCommand): help = "This will rename all documents to match the latest filename format." def add_arguments(self, parser): - parser.add_argument( - "--no-progress-bar", - default=False, - action="store_true", - help="If set, the progress bar will not be shown", - ) + self.add_argument_progress_bar_mixin(parser) def handle(self, *args, **options): + self.handle_progress_bar_mixin(**options) logging.getLogger().handlers[0].level = logging.ERROR for document in tqdm.tqdm( Document.objects.all(), - disable=options["no_progress_bar"], + disable=self.no_progress_bar, ): post_save.send(Document, instance=document) diff --git a/src/documents/management/commands/document_retagger.py b/src/documents/management/commands/document_retagger.py index 2599dd5f0..a7d2c7e12 100644 --- a/src/documents/management/commands/document_retagger.py +++ b/src/documents/management/commands/document_retagger.py @@ -4,6 +4,7 @@ import tqdm from django.core.management.base import BaseCommand from documents.classifier import load_classifier +from documents.management.commands.mixins import ProgressBarMixin from documents.models import Document from documents.signals.handlers import set_correspondent from documents.signals.handlers import set_document_type @@ -13,7 +14,7 @@ from documents.signals.handlers import set_tags logger = logging.getLogger("paperless.management.retagger") -class Command(BaseCommand): +class Command(ProgressBarMixin, BaseCommand): help = ( "Using the current classification model, assigns correspondents, tags " "and document types to all documents, effectively allowing you to " @@ -48,12 +49,7 @@ class Command(BaseCommand): "and tags that do not match anymore due to changed rules." ), ) - parser.add_argument( - "--no-progress-bar", - default=False, - action="store_true", - help="If set, the progress bar will not be shown", - ) + self.add_argument_progress_bar_mixin(parser) parser.add_argument( "--suggest", default=False, @@ -72,6 +68,7 @@ class Command(BaseCommand): ) def handle(self, *args, **options): + self.handle_progress_bar_mixin(**options) # Detect if we support color color = self.style.ERROR("test") != "test" @@ -89,7 +86,7 @@ class Command(BaseCommand): classifier = load_classifier() - for document in tqdm.tqdm(documents, disable=options["no_progress_bar"]): + for document in tqdm.tqdm(documents, disable=self.no_progress_bar): if options["correspondent"]: set_correspondent( sender=None, diff --git a/src/documents/management/commands/document_sanity_checker.py b/src/documents/management/commands/document_sanity_checker.py index 66c488fcc..095781a9d 100644 --- a/src/documents/management/commands/document_sanity_checker.py +++ b/src/documents/management/commands/document_sanity_checker.py @@ -1,20 +1,17 @@ from django.core.management.base import BaseCommand +from documents.management.commands.mixins import ProgressBarMixin from documents.sanity_checker import check_sanity -class Command(BaseCommand): +class Command(ProgressBarMixin, BaseCommand): help = "This command checks your document archive for issues." def add_arguments(self, parser): - parser.add_argument( - "--no-progress-bar", - default=False, - action="store_true", - help="If set, the progress bar will not be shown", - ) + self.add_argument_progress_bar_mixin(parser) def handle(self, *args, **options): - messages = check_sanity(progress=not options["no_progress_bar"]) + self.handle_progress_bar_mixin(**options) + messages = check_sanity(progress=self.use_progress_bar) messages.log_messages() diff --git a/src/documents/management/commands/document_thumbnails.py b/src/documents/management/commands/document_thumbnails.py index c91bf5078..fdc3d319e 100644 --- a/src/documents/management/commands/document_thumbnails.py +++ b/src/documents/management/commands/document_thumbnails.py @@ -1,12 +1,13 @@ import logging import multiprocessing -import os import shutil import tqdm from django import db from django.core.management.base import BaseCommand +from documents.management.commands.mixins import MultiProcessMixin +from documents.management.commands.mixins import ProgressBarMixin from documents.models import Document from documents.parsers import get_parser_class_for_mime_type @@ -33,7 +34,7 @@ def _process_document(doc_id): parser.cleanup() -class Command(BaseCommand): +class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand): help = "This will regenerate the thumbnails for all documents." def add_arguments(self, parser): @@ -48,22 +49,15 @@ class Command(BaseCommand): "run on this specific document." ), ) - parser.add_argument( - "--no-progress-bar", - default=False, - action="store_true", - help="If set, the progress bar will not be shown", - ) - parser.add_argument( - "--processes", - default=max(1, os.cpu_count() // 4), - type=int, - help="Number of processes to distribute work amongst", - ) + self.add_argument_progress_bar_mixin(parser) + self.add_argument_processes_mixin(parser) def handle(self, *args, **options): logging.getLogger().handlers[0].level = logging.ERROR + self.handle_processes_mixin(**options) + self.handle_progress_bar_mixin(**options) + if options["document"]: documents = Document.objects.filter(pk=options["document"]) else: @@ -76,11 +70,11 @@ class Command(BaseCommand): # with postgres. db.connections.close_all() - with multiprocessing.Pool(processes=options["processes"]) as pool: + with multiprocessing.Pool(processes=self.process_count) as pool: list( tqdm.tqdm( pool.imap_unordered(_process_document, ids), total=len(ids), - disable=options["no_progress_bar"], + disable=self.no_progress_bar, ), ) diff --git a/src/documents/management/commands/mixins.py b/src/documents/management/commands/mixins.py new file mode 100644 index 000000000..6532e7a3a --- /dev/null +++ b/src/documents/management/commands/mixins.py @@ -0,0 +1,42 @@ +import os + +from django.core.management import CommandError + + +class MultiProcessMixin: + """ + Small class to handle adding an argument and validating it + for the use of multiple processes + """ + + def add_argument_processes_mixin(self, parser): + parser.add_argument( + "--processes", + default=max(1, os.cpu_count() // 4), + type=int, + help="Number of processes to distribute work amongst", + ) + + def handle_processes_mixin(self, *args, **options): + self.process_count = options["processes"] + if self.process_count < 1: + raise CommandError("There must be at least 1 process") + + +class ProgressBarMixin: + """ + Many commands use a progress bar, which can be disabled + via this class + """ + + def add_argument_progress_bar_mixin(self, parser): + parser.add_argument( + "--no-progress-bar", + default=False, + action="store_true", + help="If set, the progress bar will not be shown", + ) + + def handle_progress_bar_mixin(self, *args, **options): + self.no_progress_bar = options["no_progress_bar"] + self.use_progress_bar = not self.no_progress_bar