Adds little classes to reduce duplication of code
This commit is contained in:
parent
f65f2613e9
commit
d231dc199d
@ -7,13 +7,15 @@ from django import db
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
from documents.tasks import update_document_archive_file
|
||||
|
||||
logger = logging.getLogger("paperless.management.archiver")
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
help = (
|
||||
"Using the current classification model, assigns correspondents, tags "
|
||||
"and document types to all documents, effectively allowing you to "
|
||||
@ -43,20 +45,13 @@ class Command(BaseCommand):
|
||||
"run on this specific document."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--processes",
|
||||
default=max(1, os.cpu_count() // 4),
|
||||
type=int,
|
||||
help="Number of processes to distribute work amongst",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
self.add_argument_processes_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_processes_mixin(**options)
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
|
||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||
|
||||
overwrite = options["overwrite"]
|
||||
@ -74,18 +69,18 @@ class Command(BaseCommand):
|
||||
)
|
||||
|
||||
# Note to future self: this prevents django from reusing database
|
||||
# conncetions between processes, which is bad and does not work
|
||||
# connections between processes, which is bad and does not work
|
||||
# with postgres.
|
||||
db.connections.close_all()
|
||||
|
||||
try:
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
||||
with multiprocessing.Pool(self.process_count) as pool:
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(update_document_archive_file, document_ids),
|
||||
total=len(document_ids),
|
||||
disable=options["no_progress_bar"],
|
||||
disable=self.no_progress_bar,
|
||||
),
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
|
@ -9,8 +9,5 @@ class Command(BaseCommand):
|
||||
"file. The document consumer will then automatically use this new model."
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
train_classifier()
|
||||
|
@ -7,6 +7,8 @@ import tqdm
|
||||
from django.core.management import BaseCommand
|
||||
from django.core.management import CommandError
|
||||
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
@ -41,7 +43,7 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult:
|
||||
return _WorkResult(work.first_doc.pk, work.second_doc.pk, match)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
help = "Searches for documents where the content almost matches"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -51,23 +53,16 @@ class Command(BaseCommand):
|
||||
type=float,
|
||||
help="Ratio to consider documents a match",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--processes",
|
||||
default=4,
|
||||
type=int,
|
||||
help="Number of processes to distribute work amongst",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
self.add_argument_processes_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
RATIO_MIN: Final[float] = 0.0
|
||||
RATIO_MAX: Final[float] = 100.0
|
||||
|
||||
self.handle_processes_mixin(**options)
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
|
||||
opt_ratio = options["ratio"]
|
||||
checked_pairs: set[tuple[int, int]] = set()
|
||||
work_pkgs: list[_WorkPackage] = []
|
||||
@ -76,9 +71,6 @@ class Command(BaseCommand):
|
||||
if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX:
|
||||
raise CommandError("The ratio must be between 0 and 100")
|
||||
|
||||
if options["processes"] < 1:
|
||||
raise CommandError("There must be at least 1 process")
|
||||
|
||||
all_docs = Document.objects.all().order_by("id")
|
||||
|
||||
# Build work packages for processing
|
||||
@ -103,7 +95,7 @@ class Command(BaseCommand):
|
||||
# Don't spin up a pool of 1 process
|
||||
if options["processes"] == 1:
|
||||
results = []
|
||||
for work in tqdm.tqdm(work_pkgs, disable=options["no_progress_bar"]):
|
||||
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar):
|
||||
results.append(_process_and_match(work))
|
||||
else:
|
||||
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
||||
@ -111,7 +103,7 @@ class Command(BaseCommand):
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(_process_and_match, work_pkgs),
|
||||
total=len(work_pkgs),
|
||||
disable=options["no_progress_bar"],
|
||||
disable=self.no_progress_bar,
|
||||
),
|
||||
)
|
||||
|
||||
|
@ -1,25 +1,22 @@
|
||||
from django.core.management import BaseCommand
|
||||
from django.db import transaction
|
||||
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.tasks import index_optimize
|
||||
from documents.tasks import index_reindex
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = "Manages the document index."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
with transaction.atomic():
|
||||
if options["command"] == "reindex":
|
||||
index_reindex(progress_bar_disable=options["no_progress_bar"])
|
||||
index_reindex(progress_bar_disable=self.no_progress_bar)
|
||||
elif options["command"] == "optimize":
|
||||
index_optimize()
|
||||
|
@ -4,25 +4,22 @@ import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.db.models.signals import post_save
|
||||
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = "This will rename all documents to match the latest filename format."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
for document in tqdm.tqdm(
|
||||
Document.objects.all(),
|
||||
disable=options["no_progress_bar"],
|
||||
disable=self.no_progress_bar,
|
||||
):
|
||||
post_save.send(Document, instance=document)
|
||||
|
@ -4,6 +4,7 @@ import tqdm
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.classifier import load_classifier
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
from documents.signals.handlers import set_correspondent
|
||||
from documents.signals.handlers import set_document_type
|
||||
@ -13,7 +14,7 @@ from documents.signals.handlers import set_tags
|
||||
logger = logging.getLogger("paperless.management.retagger")
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = (
|
||||
"Using the current classification model, assigns correspondents, tags "
|
||||
"and document types to all documents, effectively allowing you to "
|
||||
@ -48,12 +49,7 @@ class Command(BaseCommand):
|
||||
"and tags that do not match anymore due to changed rules."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
parser.add_argument(
|
||||
"--suggest",
|
||||
default=False,
|
||||
@ -72,6 +68,7 @@ class Command(BaseCommand):
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
# Detect if we support color
|
||||
color = self.style.ERROR("test") != "test"
|
||||
|
||||
@ -89,7 +86,7 @@ class Command(BaseCommand):
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
for document in tqdm.tqdm(documents, disable=options["no_progress_bar"]):
|
||||
for document in tqdm.tqdm(documents, disable=self.no_progress_bar):
|
||||
if options["correspondent"]:
|
||||
set_correspondent(
|
||||
sender=None,
|
||||
|
@ -1,20 +1,17 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.sanity_checker import check_sanity
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(ProgressBarMixin, BaseCommand):
|
||||
help = "This command checks your document archive for issues."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
messages = check_sanity(progress=not options["no_progress_bar"])
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
messages = check_sanity(progress=self.use_progress_bar)
|
||||
|
||||
messages.log_messages()
|
||||
|
@ -1,12 +1,13 @@
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import tqdm
|
||||
from django import db
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from documents.management.commands.mixins import MultiProcessMixin
|
||||
from documents.management.commands.mixins import ProgressBarMixin
|
||||
from documents.models import Document
|
||||
from documents.parsers import get_parser_class_for_mime_type
|
||||
|
||||
@ -33,7 +34,7 @@ def _process_document(doc_id):
|
||||
parser.cleanup()
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||
help = "This will regenerate the thumbnails for all documents."
|
||||
|
||||
def add_arguments(self, parser):
|
||||
@ -48,22 +49,15 @@ class Command(BaseCommand):
|
||||
"run on this specific document."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--processes",
|
||||
default=max(1, os.cpu_count() // 4),
|
||||
type=int,
|
||||
help="Number of processes to distribute work amongst",
|
||||
)
|
||||
self.add_argument_progress_bar_mixin(parser)
|
||||
self.add_argument_processes_mixin(parser)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
self.handle_processes_mixin(**options)
|
||||
self.handle_progress_bar_mixin(**options)
|
||||
|
||||
if options["document"]:
|
||||
documents = Document.objects.filter(pk=options["document"])
|
||||
else:
|
||||
@ -76,11 +70,11 @@ class Command(BaseCommand):
|
||||
# with postgres.
|
||||
db.connections.close_all()
|
||||
|
||||
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
||||
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||
list(
|
||||
tqdm.tqdm(
|
||||
pool.imap_unordered(_process_document, ids),
|
||||
total=len(ids),
|
||||
disable=options["no_progress_bar"],
|
||||
disable=self.no_progress_bar,
|
||||
),
|
||||
)
|
||||
|
42
src/documents/management/commands/mixins.py
Normal file
42
src/documents/management/commands/mixins.py
Normal file
@ -0,0 +1,42 @@
|
||||
import os
|
||||
|
||||
from django.core.management import CommandError
|
||||
|
||||
|
||||
class MultiProcessMixin:
|
||||
"""
|
||||
Small class to handle adding an argument and validating it
|
||||
for the use of multiple processes
|
||||
"""
|
||||
|
||||
def add_argument_processes_mixin(self, parser):
|
||||
parser.add_argument(
|
||||
"--processes",
|
||||
default=max(1, os.cpu_count() // 4),
|
||||
type=int,
|
||||
help="Number of processes to distribute work amongst",
|
||||
)
|
||||
|
||||
def handle_processes_mixin(self, *args, **options):
|
||||
self.process_count = options["processes"]
|
||||
if self.process_count < 1:
|
||||
raise CommandError("There must be at least 1 process")
|
||||
|
||||
|
||||
class ProgressBarMixin:
|
||||
"""
|
||||
Many commands use a progress bar, which can be disabled
|
||||
via this class
|
||||
"""
|
||||
|
||||
def add_argument_progress_bar_mixin(self, parser):
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
def handle_progress_bar_mixin(self, *args, **options):
|
||||
self.no_progress_bar = options["no_progress_bar"]
|
||||
self.use_progress_bar = not self.no_progress_bar
|
Loading…
x
Reference in New Issue
Block a user