diff --git a/docs/configuration.md b/docs/configuration.md index b68198619..94dbef77e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1159,6 +1159,36 @@ combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0. Defaults to "300" +#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE} + +: Enables the detection of barcodes in the scanned document and +assigns or creates tags if a properly formatted barcode is detected. + + The barcode must match one of the (configurable) regular expressions. + If the barcode text contains ',' (comma), it is split into multiple + barcodes which are individually processed for tagging. + + Matching is case insensitive. + + Defaults to false. + +#### [`CONSUMER_TAG_BARCODE_MAPPING=`](#CONSUMER_TAG_BARCODE_MAPPING) {#CONSUMER_TAG_BARCODE_MAPPING} + +: Defines a dictionary of filter regex and substitute expressions. + + A barcode is only considered for tagging if at least one regex is matching + the barcode text. Before looking up or creating a tag, the substitute + is applied. + + This allows very versatile matching as well as reformatting and mapping of + barcode pattern to tag values. + + Syntax: {"": "" [,...]]} + + Defaults to {"TAG:(.*)": "\\g<1>"} which includes any barcode beginning with + TAG: followed by any number of characters. It is substitured by its name + without the TAG: text. + ## Audit Trail #### [`PAPERLESS_AUDIT_LOG_ENABLED=`](#PAPERLESS_AUDIT_LOG_ENABLED) {#PAPERLESS_AUDIT_LOG_ENABLED} diff --git a/paperless.conf.example b/paperless.conf.example index 1610dcda9..ecb7c262a 100644 --- a/paperless.conf.example +++ b/paperless.conf.example @@ -46,6 +46,8 @@ #PAPERLESS_OCR_OUTPUT_TYPE=pdfa #PAPERLESS_OCR_PAGES=1 #PAPERLESS_OCR_IMAGE_DPI=300 +#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=false +#PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING={"TAG:(.*)": "\\g<1>"} #PAPERLESS_OCR_CLEAN=clean #PAPERLESS_OCR_DESKEW=true #PAPERLESS_OCR_ROTATE_PAGES=true diff --git a/src/documents/barcodes.py b/src/documents/barcodes.py index 606451f84..61667715f 100644 --- a/src/documents/barcodes.py +++ b/src/documents/barcodes.py @@ -14,6 +14,7 @@ from PIL import Image from documents.converters import convert_from_tiff_to_pdf from documents.data_models import ConsumableDocument +from documents.models import Tag from documents.plugins.base import ConsumeTaskPlugin from documents.plugins.base import StopConsumeTaskError from documents.plugins.helpers import ProgressStatusOptions @@ -65,7 +66,9 @@ class BarcodePlugin(ConsumeTaskPlugin): supported_mimes = {"application/pdf"} return ( - settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES + settings.CONSUMER_ENABLE_ASN_BARCODE + or settings.CONSUMER_ENABLE_BARCODES + or settings.CONSUMER_ENABLE_TAG_BARCODE ) and self.input_doc.mime_type in supported_mimes def setup(self): @@ -90,6 +93,16 @@ class BarcodePlugin(ConsumeTaskPlugin): logger.info(f"Found ASN in barcode: {located_asn}") self.metadata.asn = located_asn + # try reading tags from barcodes + if settings.CONSUMER_ENABLE_TAG_BARCODE: + tags = self.tags + if tags is not None and len(tags) > 0: + if self.metadata.tag_ids: + self.metadata.tag_ids += tags + else: + self.metadata.tag_ids = tags + logger.info(f"Found tags in barcode: {tags}") + separator_pages = self.get_separation_pages() if not separator_pages: return "No pages to split on!" @@ -279,6 +292,54 @@ class BarcodePlugin(ConsumeTaskPlugin): return asn + @property + def tags(self) -> Optional[list[int]]: + """ + Search the parsed barcodes for any tags. + Returns the detected tag ids (or empty list) + """ + tags = [] + + # Ensure the barcodes have been read + self.detect() + + for x in self.barcodes: + tag_texts = x.value + + for raw in tag_texts.split(","): + try: + tag = None + + mappings = settings.CONSUMER_TAG_BARCODE_MAPPING.items() + for regex, sub in mappings: + if re.match(regex, raw, flags=re.IGNORECASE): + tag = ( + re.sub(regex, sub, raw, flags=re.IGNORECASE) + if sub + else raw + ) + break + + if tag: + tag = Tag.objects.get_or_create( + name__iexact=tag, + defaults={"name": tag}, + )[0] + + logger.debug( + f"Found Tag Barcode '{raw}', substituted " + f"to '{tag}' and mapped to " + f"tag #{tag.pk}.", + ) + tags.append(tag.pk) + + except ValueError as e: + logger.warning( + f"Failed to find or create TAG '{raw}' because: {e}", + ) + + return tags + def get_separation_pages(self) -> dict[int, bool]: """ Search the parsed barcodes for separators and returns a dict of page diff --git a/src/paperless/settings.py b/src/paperless/settings.py index bc815d4d5..eb33e6241 100644 --- a/src/paperless/settings.py +++ b/src/paperless/settings.py @@ -833,6 +833,19 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float( CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300) +CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean( + "PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE", +) + +CONSUMER_TAG_BARCODE_MAPPING = dict( + json.loads( + os.getenv( + "PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING", + '{"TAG:(.*)": "\\\\g<1>"}', + ), + ), +) + CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = __get_boolean( "PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED", )