Cleans up some command help text and adds more control over process count for command with a Pool

This commit is contained in:
Trenton Holmes 2023-10-28 20:36:24 -07:00 committed by Trenton H
parent 577b49df9d
commit f65f2613e9
11 changed files with 113 additions and 106 deletions

View File

@ -17,19 +17,27 @@ class Command(BaseCommand):
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument( parser.add_argument(
"--passphrase", "--passphrase",
help="If PAPERLESS_PASSPHRASE isn't set already, you need to " help=(
"specify it here", "If PAPERLESS_PASSPHRASE isn't set already, you need to "
"specify it here"
),
) )
def handle(self, *args, **options): def handle(self, *args, **options):
try: try:
print( self.stdout.write(
"\n\nWARNING: This script is going to work directly on your " self.style.WARNING(
"document originals, so\nWARNING: you probably shouldn't run " "\n\n"
"this unless you've got a recent backup\nWARNING: handy. It " "WARNING: This script is going to work directly on your "
"*should* work without a hitch, but be safe and backup your\n" "document originals, so\n"
"WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to " "WARNING: you probably shouldn't run "
"continue.\n\n", "this unless you've got a recent backup\n"
"WARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\n"
"Hit Ctrl+C to exit now, or Enter to "
"continue.\n\n",
),
) )
_ = input() _ = input()
except KeyboardInterrupt: except KeyboardInterrupt:
@ -44,14 +52,13 @@ class Command(BaseCommand):
self.__gpg_to_unencrypted(passphrase) self.__gpg_to_unencrypted(passphrase)
@staticmethod def __gpg_to_unencrypted(self, passphrase: str):
def __gpg_to_unencrypted(passphrase):
encrypted_files = Document.objects.filter( encrypted_files = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG, storage_type=Document.STORAGE_TYPE_GPG,
) )
for document in encrypted_files: for document in encrypted_files:
print(f"Decrypting {document}".encode()) self.stdout.write(f"Decrypting {document}")
old_paths = [document.source_path, document.thumbnail_path] old_paths = [document.source_path, document.thumbnail_path]

View File

@ -14,14 +14,11 @@ logger = logging.getLogger("paperless.management.archiver")
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = (
Using the current classification model, assigns correspondents, tags "Using the current classification model, assigns correspondents, tags "
and document types to all documents, effectively allowing you to "and document types to all documents, effectively allowing you to "
back-tag all previously indexed documents with metadata created (or "back-tag all previously indexed documents with metadata created (or "
modified) after their initial import. "modified) after their initial import."
""".replace(
" ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -30,8 +27,10 @@ class Command(BaseCommand):
"--overwrite", "--overwrite",
default=False, default=False,
action="store_true", action="store_true",
help="Recreates the archived document for documents that already " help=(
"have an archived version.", "Recreates the archived document for documents that already "
"have an archived version."
),
) )
parser.add_argument( parser.add_argument(
"-d", "-d",
@ -39,8 +38,10 @@ class Command(BaseCommand):
default=None, default=None,
type=int, type=int,
required=False, required=False,
help="Specify the ID of a document, and this command will only " help=(
"run on this specific document.", "Specify the ID of a document, and this command will only "
"run on this specific document."
),
) )
parser.add_argument( parser.add_argument(
"--no-progress-bar", "--no-progress-bar",
@ -48,6 +49,12 @@ class Command(BaseCommand):
action="store_true", action="store_true",
help="If set, the progress bar will not be shown", help="If set, the progress bar will not be shown",
) )
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
def handle(self, *args, **options): def handle(self, *args, **options):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True) os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
@ -73,7 +80,7 @@ class Command(BaseCommand):
try: try:
logging.getLogger().handlers[0].level = logging.ERROR logging.getLogger().handlers[0].level = logging.ERROR
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool: with multiprocessing.Pool(processes=options["processes"]) as pool:
list( list(
tqdm.tqdm( tqdm.tqdm(
pool.imap_unordered(update_document_archive_file, document_ids), pool.imap_unordered(update_document_archive_file, document_ids),

View File

@ -4,12 +4,9 @@ from documents.tasks import train_classifier
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = (
Trains the classifier on your data and saves the resulting models to a "Trains the classifier on your data and saves the resulting models to a "
file. The document consumer will then automatically use this new model. "file. The document consumer will then automatically use this new model."
""".replace(
" ",
"",
) )
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):

View File

@ -43,13 +43,10 @@ from paperless_mail.models import MailRule
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = (
Decrypt and rename all files in our collection into a given target "Decrypt and rename all files in our collection into a given target "
directory. And include a manifest file containing document data for "directory. And include a manifest file containing document data for "
easy import. "easy import."
""".replace(
" ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -60,9 +57,11 @@ class Command(BaseCommand):
"--compare-checksums", "--compare-checksums",
default=False, default=False,
action="store_true", action="store_true",
help="Compare file checksums when determining whether to export " help=(
"a file or not. If not specified, file size and time " "Compare file checksums when determining whether to export "
"modified is used instead.", "a file or not. If not specified, file size and time "
"modified is used instead."
),
) )
parser.add_argument( parser.add_argument(
@ -70,9 +69,11 @@ class Command(BaseCommand):
"--delete", "--delete",
default=False, default=False,
action="store_true", action="store_true",
help="After exporting, delete files in the export directory that " help=(
"do not belong to the current export, such as files from " "After exporting, delete files in the export directory that "
"deleted documents.", "do not belong to the current export, such as files from "
"deleted documents."
),
) )
parser.add_argument( parser.add_argument(
@ -80,8 +81,10 @@ class Command(BaseCommand):
"--use-filename-format", "--use-filename-format",
default=False, default=False,
action="store_true", action="store_true",
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the " help=(
"export directory, if configured.", "Use PAPERLESS_FILENAME_FORMAT for storing files in the "
"export directory, if configured."
),
) )
parser.add_argument( parser.add_argument(
@ -105,8 +108,10 @@ class Command(BaseCommand):
"--use-folder-prefix", "--use-folder-prefix",
default=False, default=False,
action="store_true", action="store_true",
help="Export files in dedicated folders according to their nature: " help=(
"archive, originals or thumbnails", "Export files in dedicated folders according to their nature: "
"archive, originals or thumbnails"
),
) )
parser.add_argument( parser.add_argument(

View File

@ -40,12 +40,9 @@ def disable_signal(sig, receiver, sender):
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = (
Using a manifest.json file, load the data from there, and import the "Using a manifest.json file, load the data from there, and import the "
documents it refers to. "documents it refers to."
""".replace(
" ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):

View File

@ -8,12 +8,7 @@ from documents.models import Document
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = "This will rename all documents to match the latest filename format."
This will rename all documents to match the latest filename format.
""".replace(
" ",
"",
)
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument( parser.add_argument(

View File

@ -14,14 +14,11 @@ logger = logging.getLogger("paperless.management.retagger")
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = (
Using the current classification model, assigns correspondents, tags "Using the current classification model, assigns correspondents, tags "
and document types to all documents, effectively allowing you to "and document types to all documents, effectively allowing you to "
back-tag all previously indexed documents with metadata created (or "back-tag all previously indexed documents with metadata created (or "
modified) after their initial import. "modified) after their initial import."
""".replace(
" ",
"",
) )
def add_arguments(self, parser): def add_arguments(self, parser):
@ -34,18 +31,22 @@ class Command(BaseCommand):
"--use-first", "--use-first",
default=False, default=False,
action="store_true", action="store_true",
help="By default this command won't try to assign a correspondent " help=(
"if more than one matches the document. Use this flag if " "By default this command won't try to assign a correspondent "
"you'd rather it just pick the first one it finds.", "if more than one matches the document. Use this flag if "
"you'd rather it just pick the first one it finds."
),
) )
parser.add_argument( parser.add_argument(
"-f", "-f",
"--overwrite", "--overwrite",
default=False, default=False,
action="store_true", action="store_true",
help="If set, the document retagger will overwrite any previously" help=(
"set correspondent, document and remove correspondents, types" "If set, the document retagger will overwrite any previously"
"and tags that do not match anymore due to changed rules.", "set correspondent, document and remove correspondents, types"
"and tags that do not match anymore due to changed rules."
),
) )
parser.add_argument( parser.add_argument(
"--no-progress-bar", "--no-progress-bar",

View File

@ -4,12 +4,7 @@ from documents.sanity_checker import check_sanity
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = "This command checks your document archive for issues."
This command checks your document archive for issues.
""".replace(
" ",
"",
)
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument( parser.add_argument(

View File

@ -1,5 +1,6 @@
import logging import logging
import multiprocessing import multiprocessing
import os
import shutil import shutil
import tqdm import tqdm
@ -33,12 +34,7 @@ def _process_document(doc_id):
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = "This will regenerate the thumbnails for all documents."
This will regenerate the thumbnails for all documents.
""".replace(
" ",
"",
)
def add_arguments(self, parser): def add_arguments(self, parser):
parser.add_argument( parser.add_argument(
@ -47,8 +43,10 @@ class Command(BaseCommand):
default=None, default=None,
type=int, type=int,
required=False, required=False,
help="Specify the ID of a document, and this command will only " help=(
"run on this specific document.", "Specify the ID of a document, and this command will only "
"run on this specific document."
),
) )
parser.add_argument( parser.add_argument(
"--no-progress-bar", "--no-progress-bar",
@ -56,6 +54,12 @@ class Command(BaseCommand):
action="store_true", action="store_true",
help="If set, the progress bar will not be shown", help="If set, the progress bar will not be shown",
) )
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
def handle(self, *args, **options): def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR logging.getLogger().handlers[0].level = logging.ERROR
@ -72,7 +76,7 @@ class Command(BaseCommand):
# with postgres. # with postgres.
db.connections.close_all() db.connections.close_all()
with multiprocessing.Pool() as pool: with multiprocessing.Pool(processes=options["processes"]) as pool:
list( list(
tqdm.tqdm( tqdm.tqdm(
pool.imap_unordered(_process_document, ids), pool.imap_unordered(_process_document, ids),

View File

@ -1,5 +1,6 @@
import logging import logging
import os import os
from argparse import RawTextHelpFormatter
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand
@ -8,20 +9,22 @@ logger = logging.getLogger("paperless.management.superuser")
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = (
Creates a Django superuser: "Creates a Django superuser:\n"
User named: admin " User named: admin\n"
Email: root@localhost " Email: root@localhost\n"
with password based on env variable. " Password: based on env variable PAPERLESS_ADMIN_PASSWORD\n"
No superuser will be created, when: "No superuser will be created, when:\n"
- The username is taken already exists " - The username is taken already exists\n"
- A superuser already exists " - A superuser already exists\n"
- PAPERLESS_ADMIN_PASSWORD is not set " - PAPERLESS_ADMIN_PASSWORD is not set"
""".replace(
" ",
"",
) )
def create_parser(self, *args, **kwargs):
parser = super().create_parser(*args, **kwargs)
parser.formatter_class = RawTextHelpFormatter
return parser
def handle(self, *args, **options): def handle(self, *args, **options):
username = os.getenv("PAPERLESS_ADMIN_USER", "admin") username = os.getenv("PAPERLESS_ADMIN_USER", "admin")
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost") mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")

View File

@ -4,11 +4,7 @@ from paperless_mail import tasks
class Command(BaseCommand): class Command(BaseCommand):
help = """ help = "Manually triggers a fetching and processing of all mail accounts"
""".replace(
" ",
"",
)
def handle(self, *args, **options): def handle(self, *args, **options):
tasks.process_mail_accounts() tasks.process_mail_accounts()