Tagging by putting barcode stickers on documents (discussion #3762)
This commit is contained in:
parent
b0c305e852
commit
1dbd224b55
@ -1159,6 +1159,36 @@ combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
|
||||
|
||||
Defaults to "300"
|
||||
|
||||
#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE}
|
||||
|
||||
: Enables the detection of barcodes in the scanned document and
|
||||
assigns or creates tags if a properly formatted barcode is detected.
|
||||
|
||||
The barcode must match one of the (configurable) regular expressions.
|
||||
If the barcode text contains ',' (comma), it is split into multiple
|
||||
barcodes which are individually processed for tagging.
|
||||
|
||||
Matching is case insensitive.
|
||||
|
||||
Defaults to false.
|
||||
|
||||
#### [`CONSUMER_TAG_BARCODE_MAPPING=<json dict>`](#CONSUMER_TAG_BARCODE_MAPPING) {#CONSUMER_TAG_BARCODE_MAPPING}
|
||||
|
||||
: Defines a dictionary of filter regex and substitute expressions.
|
||||
|
||||
A barcode is only considered for tagging if at least one regex is matching
|
||||
the barcode text. Before looking up or creating a tag, the substitute
|
||||
is applied.
|
||||
|
||||
This allows very versatile matching as well as reformatting and mapping of
|
||||
barcode pattern to tag values.
|
||||
|
||||
Syntax: {"<regex>": "<substitute>" [,...]]}
|
||||
|
||||
Defaults to {"TAG:(.*)": "\\g<1>"} which includes any barcode beginning with
|
||||
TAG: followed by any number of characters. It is substitured by its name
|
||||
without the TAG: text.
|
||||
|
||||
## Audit Trail
|
||||
|
||||
#### [`PAPERLESS_AUDIT_LOG_ENABLED=<bool>`](#PAPERLESS_AUDIT_LOG_ENABLED) {#PAPERLESS_AUDIT_LOG_ENABLED}
|
||||
|
@ -46,6 +46,8 @@
|
||||
#PAPERLESS_OCR_OUTPUT_TYPE=pdfa
|
||||
#PAPERLESS_OCR_PAGES=1
|
||||
#PAPERLESS_OCR_IMAGE_DPI=300
|
||||
#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=false
|
||||
#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING={"TAG:(.*)": "\\g<1>"}
|
||||
#PAPERLESS_OCR_CLEAN=clean
|
||||
#PAPERLESS_OCR_DESKEW=true
|
||||
#PAPERLESS_OCR_ROTATE_PAGES=true
|
||||
|
@ -14,6 +14,7 @@ from PIL import Image
|
||||
|
||||
from documents.converters import convert_from_tiff_to_pdf
|
||||
from documents.data_models import ConsumableDocument
|
||||
from documents.models import Tag
|
||||
from documents.plugins.base import ConsumeTaskPlugin
|
||||
from documents.plugins.base import StopConsumeTaskError
|
||||
from documents.plugins.helpers import ProgressStatusOptions
|
||||
@ -65,7 +66,9 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
supported_mimes = {"application/pdf"}
|
||||
|
||||
return (
|
||||
settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES
|
||||
settings.CONSUMER_ENABLE_ASN_BARCODE
|
||||
or settings.CONSUMER_ENABLE_BARCODES
|
||||
or settings.CONSUMER_ENABLE_TAG_BARCODE
|
||||
) and self.input_doc.mime_type in supported_mimes
|
||||
|
||||
def setup(self):
|
||||
@ -90,6 +93,16 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
logger.info(f"Found ASN in barcode: {located_asn}")
|
||||
self.metadata.asn = located_asn
|
||||
|
||||
# try reading tags from barcodes
|
||||
if settings.CONSUMER_ENABLE_TAG_BARCODE:
|
||||
tags = self.tags
|
||||
if tags is not None and len(tags) > 0:
|
||||
if self.metadata.tag_ids:
|
||||
self.metadata.tag_ids += tags
|
||||
else:
|
||||
self.metadata.tag_ids = tags
|
||||
logger.info(f"Found tags in barcode: {tags}")
|
||||
|
||||
separator_pages = self.get_separation_pages()
|
||||
if not separator_pages:
|
||||
return "No pages to split on!"
|
||||
@ -279,6 +292,54 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
||||
|
||||
return asn
|
||||
|
||||
@property
|
||||
def tags(self) -> Optional[list[int]]:
|
||||
"""
|
||||
Search the parsed barcodes for any tags.
|
||||
Returns the detected tag ids (or empty list)
|
||||
"""
|
||||
tags = []
|
||||
|
||||
# Ensure the barcodes have been read
|
||||
self.detect()
|
||||
|
||||
for x in self.barcodes:
|
||||
tag_texts = x.value
|
||||
|
||||
for raw in tag_texts.split(","):
|
||||
try:
|
||||
tag = None
|
||||
|
||||
mappings = settings.CONSUMER_TAG_BARCODE_MAPPING.items()
|
||||
for regex, sub in mappings:
|
||||
if re.match(regex, raw, flags=re.IGNORECASE):
|
||||
tag = (
|
||||
re.sub(regex, sub, raw, flags=re.IGNORECASE)
|
||||
if sub
|
||||
else raw
|
||||
)
|
||||
break
|
||||
|
||||
if tag:
|
||||
tag = Tag.objects.get_or_create(
|
||||
name__iexact=tag,
|
||||
defaults={"name": tag},
|
||||
)[0]
|
||||
|
||||
logger.debug(
|
||||
f"Found Tag Barcode '{raw}', substituted "
|
||||
f"to '{tag}' and mapped to "
|
||||
f"tag #{tag.pk}.",
|
||||
)
|
||||
tags.append(tag.pk)
|
||||
|
||||
except ValueError as e:
|
||||
logger.warning(
|
||||
f"Failed to find or create TAG '{raw}' because: {e}",
|
||||
)
|
||||
|
||||
return tags
|
||||
|
||||
def get_separation_pages(self) -> dict[int, bool]:
|
||||
"""
|
||||
Search the parsed barcodes for separators and returns a dict of page
|
||||
|
@ -833,6 +833,19 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
|
||||
|
||||
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
||||
|
||||
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
|
||||
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
||||
)
|
||||
|
||||
CONSUMER_TAG_BARCODE_MAPPING = dict(
|
||||
json.loads(
|
||||
os.getenv(
|
||||
"PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING",
|
||||
'{"TAG:(.*)": "\\\\g<1>"}',
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean(
|
||||
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user