Introduce new document_retagger option

This commit is contained in:
Kamil Kosek 2023-08-28 20:41:56 +02:00 committed by shamoon
parent a8e13df249
commit 73c9ee395b
2 changed files with 19 additions and 1 deletions

View File

@ -351,7 +351,7 @@ currently-imported docs. This problem is common enough that there are
tools for it. tools for it.
``` ```
document_retagger [-h] [-c] [-T] [-t] [-i] [--use-first] [-f] document_retagger [-h] [-c] [-T] [-t] [-i] [--id-range] [--use-first] [-f]
optional arguments: optional arguments:
-c, --correspondent -c, --correspondent
@ -359,6 +359,7 @@ optional arguments:
-t, --document_type -t, --document_type
-s, --storage_path -s, --storage_path
-i, --inbox-only -i, --inbox-only
--id-range
--use-first --use-first
-f, --overwrite -f, --overwrite
``` ```
@ -375,6 +376,11 @@ Specify `-i` to have the document retagger work on documents tagged with
inbox tags only. This is useful when you don't want to mess with your inbox tags only. This is useful when you don't want to mess with your
already processed documents. already processed documents.
Specify `--id-range 1 100` to have the document retagger work only on a
specific range of document id´s. This can be useful if you have a lot of
documents and want to test the matching rules only on a subset of
documents.
When multiple document types or correspondents match a single document, When multiple document types or correspondents match a single document,
the retagger won't assign these to the document. Specify `--use-first` the retagger won't assign these to the document. Specify `--use-first`
to override this behavior and just use the first correspondent or type to override this behavior and just use the first correspondent or type

View File

@ -63,6 +63,12 @@ class Command(BaseCommand):
"--base-url", "--base-url",
help="The base URL to use to build the link to the documents.", help="The base URL to use to build the link to the documents.",
) )
parser.add_argument(
"--id-range",
help="A range of document id's on which the retagging should be applied.",
nargs=2,
type=int,
)
def handle(self, *args, **options): def handle(self, *args, **options):
# Detect if we support color # Detect if we support color
@ -72,6 +78,12 @@ class Command(BaseCommand):
queryset = Document.objects.filter(tags__is_inbox_tag=True) queryset = Document.objects.filter(tags__is_inbox_tag=True)
else: else:
queryset = Document.objects.all() queryset = Document.objects.all()
if options["id_range"]:
queryset = queryset.filter(
id__range=(options["id_range"][0], options["id_range"][1]),
)
documents = queryset.distinct() documents = queryset.distinct()
classifier = load_classifier() classifier = load_classifier()