Cleans up some command help text and adds more control over process count for command with a Pool

This commit is contained in:
Trenton Holmes 2023-10-28 20:36:24 -07:00 committed by Trenton H
parent 577b49df9d
commit f65f2613e9
11 changed files with 113 additions and 106 deletions

View File

@ -17,19 +17,27 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--passphrase",
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
"specify it here",
help=(
"If PAPERLESS_PASSPHRASE isn't set already, you need to "
"specify it here"
),
)
def handle(self, *args, **options):
try:
print(
"\n\nWARNING: This script is going to work directly on your "
"document originals, so\nWARNING: you probably shouldn't run "
"this unless you've got a recent backup\nWARNING: handy. It "
self.stdout.write(
self.style.WARNING(
"\n\n"
"WARNING: This script is going to work directly on your "
"document originals, so\n"
"WARNING: you probably shouldn't run "
"this unless you've got a recent backup\n"
"WARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
"WARNING: stuff first.\n\n"
"Hit Ctrl+C to exit now, or Enter to "
"continue.\n\n",
),
)
_ = input()
except KeyboardInterrupt:
@ -44,14 +52,13 @@ class Command(BaseCommand):
self.__gpg_to_unencrypted(passphrase)
@staticmethod
def __gpg_to_unencrypted(passphrase):
def __gpg_to_unencrypted(self, passphrase: str):
encrypted_files = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
)
for document in encrypted_files:
print(f"Decrypting {document}".encode())
self.stdout.write(f"Decrypting {document}")
old_paths = [document.source_path, document.thumbnail_path]

View File

@ -14,14 +14,11 @@ logger = logging.getLogger("paperless.management.archiver")
class Command(BaseCommand):
help = """
Using the current classification model, assigns correspondents, tags
and document types to all documents, effectively allowing you to
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(
" ",
"",
help = (
"Using the current classification model, assigns correspondents, tags "
"and document types to all documents, effectively allowing you to "
"back-tag all previously indexed documents with metadata created (or "
"modified) after their initial import."
)
def add_arguments(self, parser):
@ -30,8 +27,10 @@ class Command(BaseCommand):
"--overwrite",
default=False,
action="store_true",
help="Recreates the archived document for documents that already "
"have an archived version.",
help=(
"Recreates the archived document for documents that already "
"have an archived version."
),
)
parser.add_argument(
"-d",
@ -39,8 +38,10 @@ class Command(BaseCommand):
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
"run on this specific document.",
help=(
"Specify the ID of a document, and this command will only "
"run on this specific document."
),
)
parser.add_argument(
"--no-progress-bar",
@ -48,6 +49,12 @@ class Command(BaseCommand):
action="store_true",
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
def handle(self, *args, **options):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
@ -73,7 +80,7 @@ class Command(BaseCommand):
try:
logging.getLogger().handlers[0].level = logging.ERROR
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
with multiprocessing.Pool(processes=options["processes"]) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(update_document_archive_file, document_ids),

View File

@ -4,12 +4,9 @@ from documents.tasks import train_classifier
class Command(BaseCommand):
help = """
Trains the classifier on your data and saves the resulting models to a
file. The document consumer will then automatically use this new model.
""".replace(
" ",
"",
help = (
"Trains the classifier on your data and saves the resulting models to a "
"file. The document consumer will then automatically use this new model."
)
def __init__(self, *args, **kwargs):

View File

@ -43,13 +43,10 @@ from paperless_mail.models import MailRule
class Command(BaseCommand):
help = """
Decrypt and rename all files in our collection into a given target
directory. And include a manifest file containing document data for
easy import.
""".replace(
" ",
"",
help = (
"Decrypt and rename all files in our collection into a given target "
"directory. And include a manifest file containing document data for "
"easy import."
)
def add_arguments(self, parser):
@ -60,9 +57,11 @@ class Command(BaseCommand):
"--compare-checksums",
default=False,
action="store_true",
help="Compare file checksums when determining whether to export "
help=(
"Compare file checksums when determining whether to export "
"a file or not. If not specified, file size and time "
"modified is used instead.",
"modified is used instead."
),
)
parser.add_argument(
@ -70,9 +69,11 @@ class Command(BaseCommand):
"--delete",
default=False,
action="store_true",
help="After exporting, delete files in the export directory that "
help=(
"After exporting, delete files in the export directory that "
"do not belong to the current export, such as files from "
"deleted documents.",
"deleted documents."
),
)
parser.add_argument(
@ -80,8 +81,10 @@ class Command(BaseCommand):
"--use-filename-format",
default=False,
action="store_true",
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
"export directory, if configured.",
help=(
"Use PAPERLESS_FILENAME_FORMAT for storing files in the "
"export directory, if configured."
),
)
parser.add_argument(
@ -105,8 +108,10 @@ class Command(BaseCommand):
"--use-folder-prefix",
default=False,
action="store_true",
help="Export files in dedicated folders according to their nature: "
"archive, originals or thumbnails",
help=(
"Export files in dedicated folders according to their nature: "
"archive, originals or thumbnails"
),
)
parser.add_argument(

View File

@ -40,12 +40,9 @@ def disable_signal(sig, receiver, sender):
class Command(BaseCommand):
help = """
Using a manifest.json file, load the data from there, and import the
documents it refers to.
""".replace(
" ",
"",
help = (
"Using a manifest.json file, load the data from there, and import the "
"documents it refers to."
)
def add_arguments(self, parser):

View File

@ -8,12 +8,7 @@ from documents.models import Document
class Command(BaseCommand):
help = """
This will rename all documents to match the latest filename format.
""".replace(
" ",
"",
)
help = "This will rename all documents to match the latest filename format."
def add_arguments(self, parser):
parser.add_argument(

View File

@ -14,14 +14,11 @@ logger = logging.getLogger("paperless.management.retagger")
class Command(BaseCommand):
help = """
Using the current classification model, assigns correspondents, tags
and document types to all documents, effectively allowing you to
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(
" ",
"",
help = (
"Using the current classification model, assigns correspondents, tags "
"and document types to all documents, effectively allowing you to "
"back-tag all previously indexed documents with metadata created (or "
"modified) after their initial import."
)
def add_arguments(self, parser):
@ -34,18 +31,22 @@ class Command(BaseCommand):
"--use-first",
default=False,
action="store_true",
help="By default this command won't try to assign a correspondent "
help=(
"By default this command won't try to assign a correspondent "
"if more than one matches the document. Use this flag if "
"you'd rather it just pick the first one it finds.",
"you'd rather it just pick the first one it finds."
),
)
parser.add_argument(
"-f",
"--overwrite",
default=False,
action="store_true",
help="If set, the document retagger will overwrite any previously"
help=(
"If set, the document retagger will overwrite any previously"
"set correspondent, document and remove correspondents, types"
"and tags that do not match anymore due to changed rules.",
"and tags that do not match anymore due to changed rules."
),
)
parser.add_argument(
"--no-progress-bar",

View File

@ -4,12 +4,7 @@ from documents.sanity_checker import check_sanity
class Command(BaseCommand):
help = """
This command checks your document archive for issues.
""".replace(
" ",
"",
)
help = "This command checks your document archive for issues."
def add_arguments(self, parser):
parser.add_argument(

View File

@ -1,5 +1,6 @@
import logging
import multiprocessing
import os
import shutil
import tqdm
@ -33,12 +34,7 @@ def _process_document(doc_id):
class Command(BaseCommand):
help = """
This will regenerate the thumbnails for all documents.
""".replace(
" ",
"",
)
help = "This will regenerate the thumbnails for all documents."
def add_arguments(self, parser):
parser.add_argument(
@ -47,8 +43,10 @@ class Command(BaseCommand):
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
"run on this specific document.",
help=(
"Specify the ID of a document, and this command will only "
"run on this specific document."
),
)
parser.add_argument(
"--no-progress-bar",
@ -56,6 +54,12 @@ class Command(BaseCommand):
action="store_true",
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
@ -72,7 +76,7 @@ class Command(BaseCommand):
# with postgres.
db.connections.close_all()
with multiprocessing.Pool() as pool:
with multiprocessing.Pool(processes=options["processes"]) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(_process_document, ids),

View File

@ -1,5 +1,6 @@
import logging
import os
from argparse import RawTextHelpFormatter
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand
@ -8,20 +9,22 @@ logger = logging.getLogger("paperless.management.superuser")
class Command(BaseCommand):
help = """
Creates a Django superuser:
User named: admin
Email: root@localhost
with password based on env variable.
No superuser will be created, when:
- The username is taken already exists
- A superuser already exists
- PAPERLESS_ADMIN_PASSWORD is not set
""".replace(
" ",
"",
help = (
"Creates a Django superuser:\n"
" User named: admin\n"
" Email: root@localhost\n"
" Password: based on env variable PAPERLESS_ADMIN_PASSWORD\n"
"No superuser will be created, when:\n"
" - The username is taken already exists\n"
" - A superuser already exists\n"
" - PAPERLESS_ADMIN_PASSWORD is not set"
)
def create_parser(self, *args, **kwargs):
parser = super().create_parser(*args, **kwargs)
parser.formatter_class = RawTextHelpFormatter
return parser
def handle(self, *args, **options):
username = os.getenv("PAPERLESS_ADMIN_USER", "admin")
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")

View File

@ -4,11 +4,7 @@ from paperless_mail import tasks
class Command(BaseCommand):
help = """
""".replace(
" ",
"",
)
help = "Manually triggers a fetching and processing of all mail accounts"
def handle(self, *args, **options):
tasks.process_mail_accounts()