Adds little classes to reduce duplication of code
This commit is contained in:
parent
f65f2613e9
commit
d231dc199d
@ -7,13 +7,15 @@ from django import db
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import MultiProcessMixin
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.tasks import update_document_archive_file
|
from documents.tasks import update_document_archive_file
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.management.archiver")
|
logger = logging.getLogger("paperless.management.archiver")
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||||
help = (
|
help = (
|
||||||
"Using the current classification model, assigns correspondents, tags "
|
"Using the current classification model, assigns correspondents, tags "
|
||||||
"and document types to all documents, effectively allowing you to "
|
"and document types to all documents, effectively allowing you to "
|
||||||
@ -43,20 +45,13 @@ class Command(BaseCommand):
|
|||||||
"run on this specific document."
|
"run on this specific document."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
self.add_argument_processes_mixin(parser)
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--processes",
|
|
||||||
default=max(1, os.cpu_count() // 4),
|
|
||||||
type=int,
|
|
||||||
help="Number of processes to distribute work amongst",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
self.handle_processes_mixin(**options)
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
|
|
||||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||||
|
|
||||||
overwrite = options["overwrite"]
|
overwrite = options["overwrite"]
|
||||||
@ -74,18 +69,18 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Note to future self: this prevents django from reusing database
|
# Note to future self: this prevents django from reusing database
|
||||||
# conncetions between processes, which is bad and does not work
|
# connections between processes, which is bad and does not work
|
||||||
# with postgres.
|
# with postgres.
|
||||||
db.connections.close_all()
|
db.connections.close_all()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
with multiprocessing.Pool(self.process_count) as pool:
|
||||||
list(
|
list(
|
||||||
tqdm.tqdm(
|
tqdm.tqdm(
|
||||||
pool.imap_unordered(update_document_archive_file, document_ids),
|
pool.imap_unordered(update_document_archive_file, document_ids),
|
||||||
total=len(document_ids),
|
total=len(document_ids),
|
||||||
disable=options["no_progress_bar"],
|
disable=self.no_progress_bar,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
|
@ -9,8 +9,5 @@ class Command(BaseCommand):
|
|||||||
"file. The document consumer will then automatically use this new model."
|
"file. The document consumer will then automatically use this new model."
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
train_classifier()
|
train_classifier()
|
||||||
|
@ -7,6 +7,8 @@ import tqdm
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from django.core.management import CommandError
|
from django.core.management import CommandError
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import MultiProcessMixin
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
@ -41,7 +43,7 @@ def _process_and_match(work: _WorkPackage) -> _WorkResult:
|
|||||||
return _WorkResult(work.first_doc.pk, work.second_doc.pk, match)
|
return _WorkResult(work.first_doc.pk, work.second_doc.pk, match)
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||||
help = "Searches for documents where the content almost matches"
|
help = "Searches for documents where the content almost matches"
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
@ -51,23 +53,16 @@ class Command(BaseCommand):
|
|||||||
type=float,
|
type=float,
|
||||||
help="Ratio to consider documents a match",
|
help="Ratio to consider documents a match",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--processes",
|
self.add_argument_processes_mixin(parser)
|
||||||
default=4,
|
|
||||||
type=int,
|
|
||||||
help="Number of processes to distribute work amongst",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
RATIO_MIN: Final[float] = 0.0
|
RATIO_MIN: Final[float] = 0.0
|
||||||
RATIO_MAX: Final[float] = 100.0
|
RATIO_MAX: Final[float] = 100.0
|
||||||
|
|
||||||
|
self.handle_processes_mixin(**options)
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
|
|
||||||
opt_ratio = options["ratio"]
|
opt_ratio = options["ratio"]
|
||||||
checked_pairs: set[tuple[int, int]] = set()
|
checked_pairs: set[tuple[int, int]] = set()
|
||||||
work_pkgs: list[_WorkPackage] = []
|
work_pkgs: list[_WorkPackage] = []
|
||||||
@ -76,9 +71,6 @@ class Command(BaseCommand):
|
|||||||
if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX:
|
if opt_ratio < RATIO_MIN or opt_ratio > RATIO_MAX:
|
||||||
raise CommandError("The ratio must be between 0 and 100")
|
raise CommandError("The ratio must be between 0 and 100")
|
||||||
|
|
||||||
if options["processes"] < 1:
|
|
||||||
raise CommandError("There must be at least 1 process")
|
|
||||||
|
|
||||||
all_docs = Document.objects.all().order_by("id")
|
all_docs = Document.objects.all().order_by("id")
|
||||||
|
|
||||||
# Build work packages for processing
|
# Build work packages for processing
|
||||||
@ -103,7 +95,7 @@ class Command(BaseCommand):
|
|||||||
# Don't spin up a pool of 1 process
|
# Don't spin up a pool of 1 process
|
||||||
if options["processes"] == 1:
|
if options["processes"] == 1:
|
||||||
results = []
|
results = []
|
||||||
for work in tqdm.tqdm(work_pkgs, disable=options["no_progress_bar"]):
|
for work in tqdm.tqdm(work_pkgs, disable=self.no_progress_bar):
|
||||||
results.append(_process_and_match(work))
|
results.append(_process_and_match(work))
|
||||||
else:
|
else:
|
||||||
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
||||||
@ -111,7 +103,7 @@ class Command(BaseCommand):
|
|||||||
tqdm.tqdm(
|
tqdm.tqdm(
|
||||||
pool.imap_unordered(_process_and_match, work_pkgs),
|
pool.imap_unordered(_process_and_match, work_pkgs),
|
||||||
total=len(work_pkgs),
|
total=len(work_pkgs),
|
||||||
disable=options["no_progress_bar"],
|
disable=self.no_progress_bar,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -1,25 +1,22 @@
|
|||||||
from django.core.management import BaseCommand
|
from django.core.management import BaseCommand
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.tasks import index_optimize
|
from documents.tasks import index_optimize
|
||||||
from documents.tasks import index_reindex
|
from documents.tasks import index_reindex
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = "Manages the document index."
|
help = "Manages the document index."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument("command", choices=["reindex", "optimize"])
|
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
if options["command"] == "reindex":
|
if options["command"] == "reindex":
|
||||||
index_reindex(progress_bar_disable=options["no_progress_bar"])
|
index_reindex(progress_bar_disable=self.no_progress_bar)
|
||||||
elif options["command"] == "optimize":
|
elif options["command"] == "optimize":
|
||||||
index_optimize()
|
index_optimize()
|
||||||
|
@ -4,25 +4,22 @@ import tqdm
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = "This will rename all documents to match the latest filename format."
|
help = "This will rename all documents to match the latest filename format."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
for document in tqdm.tqdm(
|
for document in tqdm.tqdm(
|
||||||
Document.objects.all(),
|
Document.objects.all(),
|
||||||
disable=options["no_progress_bar"],
|
disable=self.no_progress_bar,
|
||||||
):
|
):
|
||||||
post_save.send(Document, instance=document)
|
post_save.send(Document, instance=document)
|
||||||
|
@ -4,6 +4,7 @@ import tqdm
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.signals.handlers import set_correspondent
|
from documents.signals.handlers import set_correspondent
|
||||||
from documents.signals.handlers import set_document_type
|
from documents.signals.handlers import set_document_type
|
||||||
@ -13,7 +14,7 @@ from documents.signals.handlers import set_tags
|
|||||||
logger = logging.getLogger("paperless.management.retagger")
|
logger = logging.getLogger("paperless.management.retagger")
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = (
|
help = (
|
||||||
"Using the current classification model, assigns correspondents, tags "
|
"Using the current classification model, assigns correspondents, tags "
|
||||||
"and document types to all documents, effectively allowing you to "
|
"and document types to all documents, effectively allowing you to "
|
||||||
@ -48,12 +49,7 @@ class Command(BaseCommand):
|
|||||||
"and tags that do not match anymore due to changed rules."
|
"and tags that do not match anymore due to changed rules."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--suggest",
|
"--suggest",
|
||||||
default=False,
|
default=False,
|
||||||
@ -72,6 +68,7 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
# Detect if we support color
|
# Detect if we support color
|
||||||
color = self.style.ERROR("test") != "test"
|
color = self.style.ERROR("test") != "test"
|
||||||
|
|
||||||
@ -89,7 +86,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
|
||||||
for document in tqdm.tqdm(documents, disable=options["no_progress_bar"]):
|
for document in tqdm.tqdm(documents, disable=self.no_progress_bar):
|
||||||
if options["correspondent"]:
|
if options["correspondent"]:
|
||||||
set_correspondent(
|
set_correspondent(
|
||||||
sender=None,
|
sender=None,
|
||||||
|
@ -1,20 +1,17 @@
|
|||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.sanity_checker import check_sanity
|
from documents.sanity_checker import check_sanity
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(ProgressBarMixin, BaseCommand):
|
||||||
help = "This command checks your document archive for issues."
|
help = "This command checks your document archive for issues."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
messages = check_sanity(progress=not options["no_progress_bar"])
|
self.handle_progress_bar_mixin(**options)
|
||||||
|
messages = check_sanity(progress=self.use_progress_bar)
|
||||||
|
|
||||||
messages.log_messages()
|
messages.log_messages()
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import os
|
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
import tqdm
|
import tqdm
|
||||||
from django import db
|
from django import db
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from documents.management.commands.mixins import MultiProcessMixin
|
||||||
|
from documents.management.commands.mixins import ProgressBarMixin
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
from documents.parsers import get_parser_class_for_mime_type
|
||||||
|
|
||||||
@ -33,7 +34,7 @@ def _process_document(doc_id):
|
|||||||
parser.cleanup()
|
parser.cleanup()
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(MultiProcessMixin, ProgressBarMixin, BaseCommand):
|
||||||
help = "This will regenerate the thumbnails for all documents."
|
help = "This will regenerate the thumbnails for all documents."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
@ -48,22 +49,15 @@ class Command(BaseCommand):
|
|||||||
"run on this specific document."
|
"run on this specific document."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
self.add_argument_progress_bar_mixin(parser)
|
||||||
"--no-progress-bar",
|
self.add_argument_processes_mixin(parser)
|
||||||
default=False,
|
|
||||||
action="store_true",
|
|
||||||
help="If set, the progress bar will not be shown",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--processes",
|
|
||||||
default=max(1, os.cpu_count() // 4),
|
|
||||||
type=int,
|
|
||||||
help="Number of processes to distribute work amongst",
|
|
||||||
)
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
|
self.handle_processes_mixin(**options)
|
||||||
|
self.handle_progress_bar_mixin(**options)
|
||||||
|
|
||||||
if options["document"]:
|
if options["document"]:
|
||||||
documents = Document.objects.filter(pk=options["document"])
|
documents = Document.objects.filter(pk=options["document"])
|
||||||
else:
|
else:
|
||||||
@ -76,11 +70,11 @@ class Command(BaseCommand):
|
|||||||
# with postgres.
|
# with postgres.
|
||||||
db.connections.close_all()
|
db.connections.close_all()
|
||||||
|
|
||||||
with multiprocessing.Pool(processes=options["processes"]) as pool:
|
with multiprocessing.Pool(processes=self.process_count) as pool:
|
||||||
list(
|
list(
|
||||||
tqdm.tqdm(
|
tqdm.tqdm(
|
||||||
pool.imap_unordered(_process_document, ids),
|
pool.imap_unordered(_process_document, ids),
|
||||||
total=len(ids),
|
total=len(ids),
|
||||||
disable=options["no_progress_bar"],
|
disable=self.no_progress_bar,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
42
src/documents/management/commands/mixins.py
Normal file
42
src/documents/management/commands/mixins.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from django.core.management import CommandError
|
||||||
|
|
||||||
|
|
||||||
|
class MultiProcessMixin:
|
||||||
|
"""
|
||||||
|
Small class to handle adding an argument and validating it
|
||||||
|
for the use of multiple processes
|
||||||
|
"""
|
||||||
|
|
||||||
|
def add_argument_processes_mixin(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--processes",
|
||||||
|
default=max(1, os.cpu_count() // 4),
|
||||||
|
type=int,
|
||||||
|
help="Number of processes to distribute work amongst",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle_processes_mixin(self, *args, **options):
|
||||||
|
self.process_count = options["processes"]
|
||||||
|
if self.process_count < 1:
|
||||||
|
raise CommandError("There must be at least 1 process")
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressBarMixin:
|
||||||
|
"""
|
||||||
|
Many commands use a progress bar, which can be disabled
|
||||||
|
via this class
|
||||||
|
"""
|
||||||
|
|
||||||
|
def add_argument_progress_bar_mixin(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-progress-bar",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="If set, the progress bar will not be shown",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle_progress_bar_mixin(self, *args, **options):
|
||||||
|
self.no_progress_bar = options["no_progress_bar"]
|
||||||
|
self.use_progress_bar = not self.no_progress_bar
|
Loading…
x
Reference in New Issue
Block a user