Introduce new document_retagger option

This commit is contained in:
Kamil Kosek 2023-08-28 20:41:56 +02:00 committed by shamoon
parent a8e13df249
commit 73c9ee395b
2 changed files with 19 additions and 1 deletions

View File

@ -351,7 +351,7 @@ currently-imported docs. This problem is common enough that there are
tools for it.
```
document_retagger [-h] [-c] [-T] [-t] [-i] [--use-first] [-f]
document_retagger [-h] [-c] [-T] [-t] [-i] [--id-range] [--use-first] [-f]
optional arguments:
-c, --correspondent
@ -359,6 +359,7 @@ optional arguments:
-t, --document_type
-s, --storage_path
-i, --inbox-only
--id-range
--use-first
-f, --overwrite
```
@ -375,6 +376,11 @@ Specify `-i` to have the document retagger work on documents tagged with
inbox tags only. This is useful when you don't want to mess with your
already processed documents.
Specify `--id-range 1 100` to have the document retagger work only on a
specific range of document id´s. This can be useful if you have a lot of
documents and want to test the matching rules only on a subset of
documents.
When multiple document types or correspondents match a single document,
the retagger won't assign these to the document. Specify `--use-first`
to override this behavior and just use the first correspondent or type

View File

@ -63,6 +63,12 @@ class Command(BaseCommand):
"--base-url",
help="The base URL to use to build the link to the documents.",
)
parser.add_argument(
"--id-range",
help="A range of document id's on which the retagging should be applied.",
nargs=2,
type=int,
)
def handle(self, *args, **options):
# Detect if we support color
@ -72,6 +78,12 @@ class Command(BaseCommand):
queryset = Document.objects.filter(tags__is_inbox_tag=True)
else:
queryset = Document.objects.all()
if options["id_range"]:
queryset = queryset.filter(
id__range=(options["id_range"][0], options["id_range"][1]),
)
documents = queryset.distinct()
classifier = load_classifier()