Cleans up some command help text and adds more control over process count for command with a Pool

2023-10-28 20:36:24 -07:00
parent 577b49df9d
commit f65f2613e9
11 changed files with 113 additions and 106 deletions
--- a/src/documents/management/commands/decrypt_documents.py
+++ b/src/documents/management/commands/decrypt_documents.py
@@ -17,19 +17,27 @@ class Command(BaseCommand):
    def add_arguments(self, parser):
        parser.add_argument(
            "--passphrase",
-            help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
-            "specify it here",
+            help=(
+                "If PAPERLESS_PASSPHRASE isn't set already, you need to "
+                "specify it here"
+            ),
        )

    def handle(self, *args, **options):
        try:
-            print(
-                "\n\nWARNING: This script is going to work directly on your "
-                "document originals, so\nWARNING: you probably shouldn't run "
-                "this unless you've got a recent backup\nWARNING: handy.  It "
-                "*should* work without a hitch, but be safe and backup your\n"
-                "WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
-                "continue.\n\n",
+            self.stdout.write(
+                self.style.WARNING(
+                    "\n\n"
+                    "WARNING: This script is going to work directly on your "
+                    "document originals, so\n"
+                    "WARNING: you probably shouldn't run "
+                    "this unless you've got a recent backup\n"
+                    "WARNING: handy.  It "
+                    "*should* work without a hitch, but be safe and backup your\n"
+                    "WARNING: stuff first.\n\n"
+                    "Hit Ctrl+C to exit now, or Enter to "
+                    "continue.\n\n",
+                ),
            )
            _ = input()
        except KeyboardInterrupt:
@@ -44,14 +52,13 @@ class Command(BaseCommand):

        self.__gpg_to_unencrypted(passphrase)

-    @staticmethod
-    def __gpg_to_unencrypted(passphrase):
+    def __gpg_to_unencrypted(self, passphrase: str):
        encrypted_files = Document.objects.filter(
            storage_type=Document.STORAGE_TYPE_GPG,
        )

        for document in encrypted_files:
-            print(f"Decrypting {document}".encode())
+            self.stdout.write(f"Decrypting {document}")

            old_paths = [document.source_path, document.thumbnail_path]

--- a/src/documents/management/commands/document_archiver.py
+++ b/src/documents/management/commands/document_archiver.py
@@ -14,14 +14,11 @@ logger = logging.getLogger("paperless.management.archiver")


 class Command(BaseCommand):
-    help = """
-        Using the current classification model, assigns correspondents, tags
-        and document types to all documents, effectively allowing you to
-        back-tag all previously indexed documents with metadata created (or
-        modified) after their initial import.
-    """.replace(
-        "    ",
-        "",
+    help = (
+        "Using the current classification model, assigns correspondents, tags "
+        "and document types to all documents, effectively allowing you to "
+        "back-tag all previously indexed documents with metadata created (or "
+        "modified) after their initial import."
    )

    def add_arguments(self, parser):
@@ -30,8 +27,10 @@ class Command(BaseCommand):
            "--overwrite",
            default=False,
            action="store_true",
-            help="Recreates the archived document for documents that already "
-            "have an archived version.",
+            help=(
+                "Recreates the archived document for documents that already "
+                "have an archived version."
+            ),
        )
        parser.add_argument(
            "-d",
@@ -39,8 +38,10 @@ class Command(BaseCommand):
            default=None,
            type=int,
            required=False,
-            help="Specify the ID of a document, and this command will only "
-            "run on this specific document.",
+            help=(
+                "Specify the ID of a document, and this command will only "
+                "run on this specific document."
+            ),
        )
        parser.add_argument(
            "--no-progress-bar",
@@ -48,6 +49,12 @@ class Command(BaseCommand):
            action="store_true",
            help="If set, the progress bar will not be shown",
        )
+        parser.add_argument(
+            "--processes",
+            default=max(1, os.cpu_count() // 4),
+            type=int,
+            help="Number of processes to distribute work amongst",
+        )

    def handle(self, *args, **options):
        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
@@ -73,7 +80,7 @@ class Command(BaseCommand):

        try:
            logging.getLogger().handlers[0].level = logging.ERROR
-            with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
+            with multiprocessing.Pool(processes=options["processes"]) as pool:
                list(
                    tqdm.tqdm(
                        pool.imap_unordered(update_document_archive_file, document_ids),
--- a/src/documents/management/commands/document_create_classifier.py
+++ b/src/documents/management/commands/document_create_classifier.py
@@ -4,12 +4,9 @@ from documents.tasks import train_classifier


 class Command(BaseCommand):
-    help = """
-        Trains the classifier on your data and saves the resulting models to a
-        file. The document consumer will then automatically use this new model.
-    """.replace(
-        "    ",
-        "",
+    help = (
+        "Trains the classifier on your data and saves the resulting models to a "
+        "file. The document consumer will then automatically use this new model."
    )

    def __init__(self, *args, **kwargs):
--- a/src/documents/management/commands/document_exporter.py
+++ b/src/documents/management/commands/document_exporter.py
@@ -43,13 +43,10 @@ from paperless_mail.models import MailRule


 class Command(BaseCommand):
-    help = """
-        Decrypt and rename all files in our collection into a given target
-        directory.  And include a manifest file containing document data for
-        easy import.
-    """.replace(
-        "    ",
-        "",
+    help = (
+        "Decrypt and rename all files in our collection into a given target "
+        "directory.  And include a manifest file containing document data for "
+        "easy import."
    )

    def add_arguments(self, parser):
@@ -60,9 +57,11 @@ class Command(BaseCommand):
            "--compare-checksums",
            default=False,
            action="store_true",
-            help="Compare file checksums when determining whether to export "
-            "a file or not. If not specified, file size and time "
-            "modified is used instead.",
+            help=(
+                "Compare file checksums when determining whether to export "
+                "a file or not. If not specified, file size and time "
+                "modified is used instead."
+            ),
        )

        parser.add_argument(
@@ -70,9 +69,11 @@ class Command(BaseCommand):
            "--delete",
            default=False,
            action="store_true",
-            help="After exporting, delete files in the export directory that "
-            "do not belong to the current export, such as files from "
-            "deleted documents.",
+            help=(
+                "After exporting, delete files in the export directory that "
+                "do not belong to the current export, such as files from "
+                "deleted documents."
+            ),
        )

        parser.add_argument(
@@ -80,8 +81,10 @@ class Command(BaseCommand):
            "--use-filename-format",
            default=False,
            action="store_true",
-            help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
-            "export directory, if configured.",
+            help=(
+                "Use PAPERLESS_FILENAME_FORMAT for storing files in the "
+                "export directory, if configured."
+            ),
        )

        parser.add_argument(
@@ -105,8 +108,10 @@ class Command(BaseCommand):
            "--use-folder-prefix",
            default=False,
            action="store_true",
-            help="Export files in dedicated folders according to their nature: "
-            "archive, originals or thumbnails",
+            help=(
+                "Export files in dedicated folders according to their nature: "
+                "archive, originals or thumbnails"
+            ),
        )

        parser.add_argument(
--- a/src/documents/management/commands/document_importer.py
+++ b/src/documents/management/commands/document_importer.py
@@ -40,12 +40,9 @@ def disable_signal(sig, receiver, sender):


 class Command(BaseCommand):
-    help = """
-        Using a manifest.json file, load the data from there, and import the
-        documents it refers to.
-    """.replace(
-        "    ",
-        "",
+    help = (
+        "Using a manifest.json file, load the data from there, and import the "
+        "documents it refers to."
    )

    def add_arguments(self, parser):
--- a/src/documents/management/commands/document_renamer.py
+++ b/src/documents/management/commands/document_renamer.py
@@ -8,12 +8,7 @@ from documents.models import Document


 class Command(BaseCommand):
-    help = """
-        This will rename all documents to match the latest filename format.
-    """.replace(
-        "    ",
-        "",
-    )
+    help = "This will rename all documents to match the latest filename format."

    def add_arguments(self, parser):
        parser.add_argument(
--- a/src/documents/management/commands/document_retagger.py
+++ b/src/documents/management/commands/document_retagger.py
@@ -14,14 +14,11 @@ logger = logging.getLogger("paperless.management.retagger")


 class Command(BaseCommand):
-    help = """
-        Using the current classification model, assigns correspondents, tags
-        and document types to all documents, effectively allowing you to
-        back-tag all previously indexed documents with metadata created (or
-        modified) after their initial import.
-    """.replace(
-        "    ",
-        "",
+    help = (
+        "Using the current classification model, assigns correspondents, tags "
+        "and document types to all documents, effectively allowing you to "
+        "back-tag all previously indexed documents with metadata created (or "
+        "modified) after their initial import."
    )

    def add_arguments(self, parser):
@@ -34,18 +31,22 @@ class Command(BaseCommand):
            "--use-first",
            default=False,
            action="store_true",
-            help="By default this command won't try to assign a correspondent "
-            "if more than one matches the document.  Use this flag if "
-            "you'd rather it just pick the first one it finds.",
+            help=(
+                "By default this command won't try to assign a correspondent "
+                "if more than one matches the document.  Use this flag if "
+                "you'd rather it just pick the first one it finds."
+            ),
        )
        parser.add_argument(
            "-f",
            "--overwrite",
            default=False,
            action="store_true",
-            help="If set, the document retagger will overwrite any previously"
-            "set correspondent, document and remove correspondents, types"
-            "and tags that do not match anymore due to changed rules.",
+            help=(
+                "If set, the document retagger will overwrite any previously"
+                "set correspondent, document and remove correspondents, types"
+                "and tags that do not match anymore due to changed rules."
+            ),
        )
        parser.add_argument(
            "--no-progress-bar",
--- a/src/documents/management/commands/document_sanity_checker.py
+++ b/src/documents/management/commands/document_sanity_checker.py
@@ -4,12 +4,7 @@ from documents.sanity_checker import check_sanity


 class Command(BaseCommand):
-    help = """
-        This command checks your document archive for issues.
-    """.replace(
-        "    ",
-        "",
-    )
+    help = "This command checks your document archive for issues."

    def add_arguments(self, parser):
        parser.add_argument(
--- a/src/documents/management/commands/document_thumbnails.py
+++ b/src/documents/management/commands/document_thumbnails.py
@@ -1,5 +1,6 @@
 import logging
 import multiprocessing
+import os
 import shutil

 import tqdm
@@ -33,12 +34,7 @@ def _process_document(doc_id):


 class Command(BaseCommand):
-    help = """
-        This will regenerate the thumbnails for all documents.
-    """.replace(
-        "    ",
-        "",
-    )
+    help = "This will regenerate the thumbnails for all documents."

    def add_arguments(self, parser):
        parser.add_argument(
@@ -47,8 +43,10 @@ class Command(BaseCommand):
            default=None,
            type=int,
            required=False,
-            help="Specify the ID of a document, and this command will only "
-            "run on this specific document.",
+            help=(
+                "Specify the ID of a document, and this command will only "
+                "run on this specific document."
+            ),
        )
        parser.add_argument(
            "--no-progress-bar",
@@ -56,6 +54,12 @@ class Command(BaseCommand):
            action="store_true",
            help="If set, the progress bar will not be shown",
        )
+        parser.add_argument(
+            "--processes",
+            default=max(1, os.cpu_count() // 4),
+            type=int,
+            help="Number of processes to distribute work amongst",
+        )

    def handle(self, *args, **options):
        logging.getLogger().handlers[0].level = logging.ERROR
@@ -72,7 +76,7 @@ class Command(BaseCommand):
        # with postgres.
        db.connections.close_all()

-        with multiprocessing.Pool() as pool:
+        with multiprocessing.Pool(processes=options["processes"]) as pool:
            list(
                tqdm.tqdm(
                    pool.imap_unordered(_process_document, ids),
--- a/src/documents/management/commands/manage_superuser.py
+++ b/src/documents/management/commands/manage_superuser.py
@@ -1,5 +1,6 @@
 import logging
 import os
+from argparse import RawTextHelpFormatter

 from django.contrib.auth.models import User
 from django.core.management.base import BaseCommand
@@ -8,20 +9,22 @@ logger = logging.getLogger("paperless.management.superuser")


 class Command(BaseCommand):
-    help = """
-        Creates a Django superuser:
-        User named: admin
-        Email: root@localhost
-        with password based on env variable.
-        No superuser will be created, when:
-        - The username is taken already exists
-        - A superuser already exists
-        - PAPERLESS_ADMIN_PASSWORD is not set
-    """.replace(
-        "    ",
-        "",
+    help = (
+        "Creates a Django superuser:\n"
+        "  User named: admin\n"
+        "  Email: root@localhost\n"
+        "  Password: based on env variable PAPERLESS_ADMIN_PASSWORD\n"
+        "No superuser will be created, when:\n"
+        "  - The username is taken already exists\n"
+        "  - A superuser already exists\n"
+        "  - PAPERLESS_ADMIN_PASSWORD is not set"
    )

+    def create_parser(self, *args, **kwargs):
+        parser = super().create_parser(*args, **kwargs)
+        parser.formatter_class = RawTextHelpFormatter
+        return parser
+
    def handle(self, *args, **options):
        username = os.getenv("PAPERLESS_ADMIN_USER", "admin")
        mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")
--- a/src/paperless_mail/management/commands/mail_fetcher.py
+++ b/src/paperless_mail/management/commands/mail_fetcher.py
@@ -4,11 +4,7 @@ from paperless_mail import tasks


 class Command(BaseCommand):
-    help = """
-    """.replace(
-        "    ",
-        "",
-    )
+    help = "Manually triggers a fetching and processing of all mail accounts"

    def handle(self, *args, **options):
        tasks.process_mail_accounts()