Cleans up some command help text and adds more control over process count for command with a Pool

This commit is contained in:
Trenton Holmes 2023-10-28 20:36:24 -07:00 committed by Trenton H
parent 577b49df9d
commit f65f2613e9
11 changed files with 113 additions and 106 deletions

View File

@ -17,19 +17,27 @@ class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--passphrase",
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
"specify it here",
help=(
"If PAPERLESS_PASSPHRASE isn't set already, you need to "
"specify it here"
),
)
def handle(self, *args, **options):
try:
print(
"\n\nWARNING: This script is going to work directly on your "
"document originals, so\nWARNING: you probably shouldn't run "
"this unless you've got a recent backup\nWARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
"continue.\n\n",
self.stdout.write(
self.style.WARNING(
"\n\n"
"WARNING: This script is going to work directly on your "
"document originals, so\n"
"WARNING: you probably shouldn't run "
"this unless you've got a recent backup\n"
"WARNING: handy. It "
"*should* work without a hitch, but be safe and backup your\n"
"WARNING: stuff first.\n\n"
"Hit Ctrl+C to exit now, or Enter to "
"continue.\n\n",
),
)
_ = input()
except KeyboardInterrupt:
@ -44,14 +52,13 @@ class Command(BaseCommand):
self.__gpg_to_unencrypted(passphrase)
@staticmethod
def __gpg_to_unencrypted(passphrase):
def __gpg_to_unencrypted(self, passphrase: str):
encrypted_files = Document.objects.filter(
storage_type=Document.STORAGE_TYPE_GPG,
)
for document in encrypted_files:
print(f"Decrypting {document}".encode())
self.stdout.write(f"Decrypting {document}")
old_paths = [document.source_path, document.thumbnail_path]

View File

@ -14,14 +14,11 @@ logger = logging.getLogger("paperless.management.archiver")
class Command(BaseCommand):
help = """
Using the current classification model, assigns correspondents, tags
and document types to all documents, effectively allowing you to
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(
" ",
"",
help = (
"Using the current classification model, assigns correspondents, tags "
"and document types to all documents, effectively allowing you to "
"back-tag all previously indexed documents with metadata created (or "
"modified) after their initial import."
)
def add_arguments(self, parser):
@ -30,8 +27,10 @@ class Command(BaseCommand):
"--overwrite",
default=False,
action="store_true",
help="Recreates the archived document for documents that already "
"have an archived version.",
help=(
"Recreates the archived document for documents that already "
"have an archived version."
),
)
parser.add_argument(
"-d",
@ -39,8 +38,10 @@ class Command(BaseCommand):
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
"run on this specific document.",
help=(
"Specify the ID of a document, and this command will only "
"run on this specific document."
),
)
parser.add_argument(
"--no-progress-bar",
@ -48,6 +49,12 @@ class Command(BaseCommand):
action="store_true",
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
def handle(self, *args, **options):
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
@ -73,7 +80,7 @@ class Command(BaseCommand):
try:
logging.getLogger().handlers[0].level = logging.ERROR
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
with multiprocessing.Pool(processes=options["processes"]) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(update_document_archive_file, document_ids),

View File

@ -4,12 +4,9 @@ from documents.tasks import train_classifier
class Command(BaseCommand):
help = """
Trains the classifier on your data and saves the resulting models to a
file. The document consumer will then automatically use this new model.
""".replace(
" ",
"",
help = (
"Trains the classifier on your data and saves the resulting models to a "
"file. The document consumer will then automatically use this new model."
)
def __init__(self, *args, **kwargs):

View File

@ -43,13 +43,10 @@ from paperless_mail.models import MailRule
class Command(BaseCommand):
help = """
Decrypt and rename all files in our collection into a given target
directory. And include a manifest file containing document data for
easy import.
""".replace(
" ",
"",
help = (
"Decrypt and rename all files in our collection into a given target "
"directory. And include a manifest file containing document data for "
"easy import."
)
def add_arguments(self, parser):
@ -60,9 +57,11 @@ class Command(BaseCommand):
"--compare-checksums",
default=False,
action="store_true",
help="Compare file checksums when determining whether to export "
"a file or not. If not specified, file size and time "
"modified is used instead.",
help=(
"Compare file checksums when determining whether to export "
"a file or not. If not specified, file size and time "
"modified is used instead."
),
)
parser.add_argument(
@ -70,9 +69,11 @@ class Command(BaseCommand):
"--delete",
default=False,
action="store_true",
help="After exporting, delete files in the export directory that "
"do not belong to the current export, such as files from "
"deleted documents.",
help=(
"After exporting, delete files in the export directory that "
"do not belong to the current export, such as files from "
"deleted documents."
),
)
parser.add_argument(
@ -80,8 +81,10 @@ class Command(BaseCommand):
"--use-filename-format",
default=False,
action="store_true",
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
"export directory, if configured.",
help=(
"Use PAPERLESS_FILENAME_FORMAT for storing files in the "
"export directory, if configured."
),
)
parser.add_argument(
@ -105,8 +108,10 @@ class Command(BaseCommand):
"--use-folder-prefix",
default=False,
action="store_true",
help="Export files in dedicated folders according to their nature: "
"archive, originals or thumbnails",
help=(
"Export files in dedicated folders according to their nature: "
"archive, originals or thumbnails"
),
)
parser.add_argument(

View File

@ -40,12 +40,9 @@ def disable_signal(sig, receiver, sender):
class Command(BaseCommand):
help = """
Using a manifest.json file, load the data from there, and import the
documents it refers to.
""".replace(
" ",
"",
help = (
"Using a manifest.json file, load the data from there, and import the "
"documents it refers to."
)
def add_arguments(self, parser):

View File

@ -8,12 +8,7 @@ from documents.models import Document
class Command(BaseCommand):
help = """
This will rename all documents to match the latest filename format.
""".replace(
" ",
"",
)
help = "This will rename all documents to match the latest filename format."
def add_arguments(self, parser):
parser.add_argument(

View File

@ -14,14 +14,11 @@ logger = logging.getLogger("paperless.management.retagger")
class Command(BaseCommand):
help = """
Using the current classification model, assigns correspondents, tags
and document types to all documents, effectively allowing you to
back-tag all previously indexed documents with metadata created (or
modified) after their initial import.
""".replace(
" ",
"",
help = (
"Using the current classification model, assigns correspondents, tags "
"and document types to all documents, effectively allowing you to "
"back-tag all previously indexed documents with metadata created (or "
"modified) after their initial import."
)
def add_arguments(self, parser):
@ -34,18 +31,22 @@ class Command(BaseCommand):
"--use-first",
default=False,
action="store_true",
help="By default this command won't try to assign a correspondent "
"if more than one matches the document. Use this flag if "
"you'd rather it just pick the first one it finds.",
help=(
"By default this command won't try to assign a correspondent "
"if more than one matches the document. Use this flag if "
"you'd rather it just pick the first one it finds."
),
)
parser.add_argument(
"-f",
"--overwrite",
default=False,
action="store_true",
help="If set, the document retagger will overwrite any previously"
"set correspondent, document and remove correspondents, types"
"and tags that do not match anymore due to changed rules.",
help=(
"If set, the document retagger will overwrite any previously"
"set correspondent, document and remove correspondents, types"
"and tags that do not match anymore due to changed rules."
),
)
parser.add_argument(
"--no-progress-bar",

View File

@ -4,12 +4,7 @@ from documents.sanity_checker import check_sanity
class Command(BaseCommand):
help = """
This command checks your document archive for issues.
""".replace(
" ",
"",
)
help = "This command checks your document archive for issues."
def add_arguments(self, parser):
parser.add_argument(

View File

@ -1,5 +1,6 @@
import logging
import multiprocessing
import os
import shutil
import tqdm
@ -33,12 +34,7 @@ def _process_document(doc_id):
class Command(BaseCommand):
help = """
This will regenerate the thumbnails for all documents.
""".replace(
" ",
"",
)
help = "This will regenerate the thumbnails for all documents."
def add_arguments(self, parser):
parser.add_argument(
@ -47,8 +43,10 @@ class Command(BaseCommand):
default=None,
type=int,
required=False,
help="Specify the ID of a document, and this command will only "
"run on this specific document.",
help=(
"Specify the ID of a document, and this command will only "
"run on this specific document."
),
)
parser.add_argument(
"--no-progress-bar",
@ -56,6 +54,12 @@ class Command(BaseCommand):
action="store_true",
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
@ -72,7 +76,7 @@ class Command(BaseCommand):
# with postgres.
db.connections.close_all()
with multiprocessing.Pool() as pool:
with multiprocessing.Pool(processes=options["processes"]) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(_process_document, ids),

View File

@ -1,5 +1,6 @@
import logging
import os
from argparse import RawTextHelpFormatter
from django.contrib.auth.models import User
from django.core.management.base import BaseCommand
@ -8,20 +9,22 @@ logger = logging.getLogger("paperless.management.superuser")
class Command(BaseCommand):
help = """
Creates a Django superuser:
User named: admin
Email: root@localhost
with password based on env variable.
No superuser will be created, when:
- The username is taken already exists
- A superuser already exists
- PAPERLESS_ADMIN_PASSWORD is not set
""".replace(
" ",
"",
help = (
"Creates a Django superuser:\n"
" User named: admin\n"
" Email: root@localhost\n"
" Password: based on env variable PAPERLESS_ADMIN_PASSWORD\n"
"No superuser will be created, when:\n"
" - The username is taken already exists\n"
" - A superuser already exists\n"
" - PAPERLESS_ADMIN_PASSWORD is not set"
)
def create_parser(self, *args, **kwargs):
parser = super().create_parser(*args, **kwargs)
parser.formatter_class = RawTextHelpFormatter
return parser
def handle(self, *args, **options):
username = os.getenv("PAPERLESS_ADMIN_USER", "admin")
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")

View File

@ -4,11 +4,7 @@ from paperless_mail import tasks
class Command(BaseCommand):
help = """
""".replace(
" ",
"",
)
help = "Manually triggers a fetching and processing of all mail accounts"
def handle(self, *args, **options):
tasks.process_mail_accounts()