Adds little classes to reduce duplication of code

This commit is contained in:
Trenton H 2023-11-01 15:22:52 -07:00
parent f65f2613e9
commit d231dc199d
9 changed files with 93 additions and 85 deletions

View File

@ -7,13 +7,15 @@ from django import db
from django.conf import settings
from django.core.management.base import BaseCommand
from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
from documents.tasks import update_document_archive_file
logger = logging.getLogger("paperless.management.archiver")
class Command(BaseCommand):
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
help = (
"Using the current classification model, assigns correspondents, tags "
"and document types to all documents, effectively allowing you to "
@ -43,20 +45,13 @@ class Command(BaseCommand):
"run on this specific document."
),
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
self.add_argument_progress_bar_mixin(parser)
self.add_argument_processes_mixin(parser)
def handle(self, *args, **options):
self.handle_processes_mixin(**options)
self.handle_progress_bar_mixin(**options)
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
overwrite = options["overwrite"]
@ -74,18 +69,18 @@ class Command(BaseCommand):
)
# Note to future self: this prevents django from reusing database
# conncetions between processes, which is bad and does not work
# connections between processes, which is bad and does not work
# with postgres.
db.connections.close_all()
try:
logging.getLogger().handlers[0].level = logging.ERROR
with multiprocessing.Pool(processes=options["processes"]) as pool:
with multiprocessing.Pool(self.process_count) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(update_document_archive_file, document_ids),
total=len(document_ids),
disable=options["no_progress_bar"],
disable=self.no_progress_bar,
),
)
except KeyboardInterrupt:

View File

@ -9,8 +9,5 @@ class Command(BaseCommand):
"file. The document consumer will then automatically use this new model."
)
def __init__(self, *args, **kwargs):
BaseCommand.__init__(self, *args, **kwargs)
def handle(self, *args, **options):
train_classifier()

View File

@ -7,6 +7,8 @@ import tqdm
from django.core.management import BaseCommand
from django.core.management import CommandError
from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
@ -41,7 +43,7 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult:
return _WorkResult(work.first_doc.pk, work.second_doc.pk, match)
class Command(BaseCommand):
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
help = "Searches for documents where the content almost matches"
def add_arguments(self, parser):
@ -51,23 +53,16 @@ class Command(BaseCommand):
type=float,
help="Ratio to consider documents a match",
)
parser.add_argument(
"--processes",
default=4,
type=int,
help="Number of processes to distribute work amongst",
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
self.add_argument_progress_bar_mixin(parser)
self.add_argument_processes_mixin(parser)
def handle(self, *args, **options):
RATIO_MIN: Final[float] = 0.0
RATIO_MAX: Final[float] = 100.0
self.handle_processes_mixin(**options)
self.handle_progress_bar_mixin(**options)
opt_ratio = options["ratio"]
checked_pairs: set[tuple[int, int]] = set()
work_pkgs: list[_WorkPackage] = []
@ -76,9 +71,6 @@ class Command(BaseCommand):
if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX:
raise CommandError("The ratio must be between 0 and 100")
if options["processes"] < 1:
raise CommandError("There must be at least 1 process")
all_docs = Document.objects.all().order_by("id")
# Build work packages for processing
@ -103,7 +95,7 @@ class Command(BaseCommand):
# Don't spin up a pool of 1 process
if options["processes"] == 1:
results = []
for work in tqdm.tqdm(work_pkgs, disable=options["no_progress_bar"]):
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar):
results.append(_process_and_match(work))
else:
with multiprocessing.Pool(processes=options["processes"]) as pool:
@ -111,7 +103,7 @@ class Command(BaseCommand):
tqdm.tqdm(
pool.imap_unordered(_process_and_match, work_pkgs),
total=len(work_pkgs),
disable=options["no_progress_bar"],
disable=self.no_progress_bar,
),
)

View File

@ -1,25 +1,22 @@
from django.core.management import BaseCommand
from django.db import transaction
from documents.management.commands.mixins import ProgressBarMixin
from documents.tasks import index_optimize
from documents.tasks import index_reindex
class Command(BaseCommand):
class Command(ProgressBarMixin, BaseCommand):
help = "Manages the document index."
def add_arguments(self, parser):
parser.add_argument("command", choices=["reindex", "optimize"])
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
self.add_argument_progress_bar_mixin(parser)
def handle(self, *args, **options):
self.handle_progress_bar_mixin(**options)
with transaction.atomic():
if options["command"] == "reindex":
index_reindex(progress_bar_disable=options["no_progress_bar"])
index_reindex(progress_bar_disable=self.no_progress_bar)
elif options["command"] == "optimize":
index_optimize()

View File

@ -4,25 +4,22 @@ import tqdm
from django.core.management.base import BaseCommand
from django.db.models.signals import post_save
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
class Command(BaseCommand):
class Command(ProgressBarMixin, BaseCommand):
help = "This will rename all documents to match the latest filename format."
def add_arguments(self, parser):
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
self.add_argument_progress_bar_mixin(parser)
def handle(self, *args, **options):
self.handle_progress_bar_mixin(**options)
logging.getLogger().handlers[0].level = logging.ERROR
for document in tqdm.tqdm(
Document.objects.all(),
disable=options["no_progress_bar"],
disable=self.no_progress_bar,
):
post_save.send(Document, instance=document)

View File

@ -4,6 +4,7 @@ import tqdm
from django.core.management.base import BaseCommand
from documents.classifier import load_classifier
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
from documents.signals.handlers import set_correspondent
from documents.signals.handlers import set_document_type
@ -13,7 +14,7 @@ from documents.signals.handlers import set_tags
logger = logging.getLogger("paperless.management.retagger")
class Command(BaseCommand):
class Command(ProgressBarMixin, BaseCommand):
help = (
"Using the current classification model, assigns correspondents, tags "
"and document types to all documents, effectively allowing you to "
@ -48,12 +49,7 @@ class Command(BaseCommand):
"and tags that do not match anymore due to changed rules."
),
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
self.add_argument_progress_bar_mixin(parser)
parser.add_argument(
"--suggest",
default=False,
@ -72,6 +68,7 @@ class Command(BaseCommand):
)
def handle(self, *args, **options):
self.handle_progress_bar_mixin(**options)
# Detect if we support color
color = self.style.ERROR("test") != "test"
@ -89,7 +86,7 @@ class Command(BaseCommand):
classifier = load_classifier()
for document in tqdm.tqdm(documents, disable=options["no_progress_bar"]):
for document in tqdm.tqdm(documents, disable=self.no_progress_bar):
if options["correspondent"]:
set_correspondent(
sender=None,

View File

@ -1,20 +1,17 @@
from django.core.management.base import BaseCommand
from documents.management.commands.mixins import ProgressBarMixin
from documents.sanity_checker import check_sanity
class Command(BaseCommand):
class Command(ProgressBarMixin, BaseCommand):
help = "This command checks your document archive for issues."
def add_arguments(self, parser):
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
self.add_argument_progress_bar_mixin(parser)
def handle(self, *args, **options):
messages = check_sanity(progress=not options["no_progress_bar"])
self.handle_progress_bar_mixin(**options)
messages = check_sanity(progress=self.use_progress_bar)
messages.log_messages()

View File

@ -1,12 +1,13 @@
import logging
import multiprocessing
import os
import shutil
import tqdm
from django import db
from django.core.management.base import BaseCommand
from documents.management.commands.mixins import MultiProcessMixin
from documents.management.commands.mixins import ProgressBarMixin
from documents.models import Document
from documents.parsers import get_parser_class_for_mime_type
@ -33,7 +34,7 @@ def _process_document(doc_id):
parser.cleanup()
class Command(BaseCommand):
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
help = "This will regenerate the thumbnails for all documents."
def add_arguments(self, parser):
@ -48,22 +49,15 @@ class Command(BaseCommand):
"run on this specific document."
),
)
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
self.add_argument_progress_bar_mixin(parser)
self.add_argument_processes_mixin(parser)
def handle(self, *args, **options):
logging.getLogger().handlers[0].level = logging.ERROR
self.handle_processes_mixin(**options)
self.handle_progress_bar_mixin(**options)
if options["document"]:
documents = Document.objects.filter(pk=options["document"])
else:
@ -76,11 +70,11 @@ class Command(BaseCommand):
# with postgres.
db.connections.close_all()
with multiprocessing.Pool(processes=options["processes"]) as pool:
with multiprocessing.Pool(processes=self.process_count) as pool:
list(
tqdm.tqdm(
pool.imap_unordered(_process_document, ids),
total=len(ids),
disable=options["no_progress_bar"],
disable=self.no_progress_bar,
),
)

View File

@ -0,0 +1,42 @@
import os
from django.core.management import CommandError
class MultiProcessMixin:
"""
Small class to handle adding an argument and validating it
for the use of multiple processes
"""
def add_argument_processes_mixin(self, parser):
parser.add_argument(
"--processes",
default=max(1, os.cpu_count() // 4),
type=int,
help="Number of processes to distribute work amongst",
)
def handle_processes_mixin(self, *args, **options):
self.process_count = options["processes"]
if self.process_count < 1:
raise CommandError("There must be at least 1 process")
class ProgressBarMixin:
"""
Many commands use a progress bar, which can be disabled
via this class
"""
def add_argument_progress_bar_mixin(self, parser):
parser.add_argument(
"--no-progress-bar",
default=False,
action="store_true",
help="If set, the progress bar will not be shown",
)
def handle_progress_bar_mixin(self, *args, **options):
self.no_progress_bar = options["no_progress_bar"]
self.use_progress_bar = not self.no_progress_bar