Reduced storage requirements for barcode scanning. Added limit for pages to scan barcodes on.

This commit is contained in:
Lukas Metzger 2024-09-07 12:28:07 +00:00
parent 4903e4290d
commit 89d3eb5bc3
3 changed files with 52 additions and 6 deletions

View File

@ -1289,6 +1289,18 @@ combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
Defaults to "300" Defaults to "300"
#### [`PAPERLESS_CONSUMER_BARCODE_MAX_PAGES=<int>`](#PAPERLESS_CONSUMER_BARCODE_MAX_PAGES) {#PAPERLESS_CONSUMER_BARCODE_MAX_PAGES}
: Barcode detection is a computationaly intensive operation. This setting
limits the recognition of barcodes to a number of first pages. Since many
scanners have a limit for the number of pages that can be scanned it is
sensible to set this as the limit here. If a longer document is encountered
it is imported directly and not via scanner there should also be no barcodes
present. By setting this value to 0 the upper limit is disabled. This is also
the default setting.
Defaults to "0"
#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE} #### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE}
: Enables the detection of barcodes in the scanned document and : Enables the detection of barcodes in the scanned document and

View File

@ -1,4 +1,5 @@
import logging import logging
import os
import re import re
import tempfile import tempfile
from dataclasses import dataclass from dataclasses import dataclass
@ -7,6 +8,7 @@ from typing import Optional
from django.conf import settings from django.conf import settings
from pdf2image import convert_from_path from pdf2image import convert_from_path
from pdf2image import pdfinfo_from_path
from pdf2image.exceptions import PDFPageCountError from pdf2image.exceptions import PDFPageCountError
from pikepdf import Page from pikepdf import Page
from pikepdf import Pdf from pikepdf import Pdf
@ -231,13 +233,37 @@ class BarcodePlugin(ConsumeTaskPlugin):
logger.debug("Scanning for barcodes using ZXING") logger.debug("Scanning for barcodes using ZXING")
try: try:
pages_from_path = convert_from_path( # Read number of pages from pdf
num_of_pages = pdfinfo_from_path(self.pdf_file)["Pages"]
logger.debug(f"PDF has {num_of_pages} pages")
# Get limit from configuration
barcode_max_pages = settings.CONSUMER_BARCODE_MAX_PAGES
if barcode_max_pages == 0:
barcode_max_pages = num_of_pages
if barcode_max_pages < num_of_pages:
logger.debug(f"Reading of barcodes is limited to the first {barcode_max_pages} pages")
# Loop al page
for current_page_number in range(min(num_of_pages, barcode_max_pages)):
logger.debug(f"Processing page {current_page_number}")
# Convert page to image
logger.debug("Converting page to image")
page = convert_from_path(
self.pdf_file, self.pdf_file,
dpi=settings.CONSUMER_BARCODE_DPI, dpi=settings.CONSUMER_BARCODE_DPI,
output_folder=self.temp_dir.name, output_folder=self.temp_dir.name,
) first_page=current_page_number + 1,
last_page=current_page_number + 1,
)[0]
for current_page_number, page in enumerate(pages_from_path): # Remember filename, since it is lost by upscaling
page_filename = page.filename
logger.debug(f"Image is at {page_filename}")
# Upscale image if configured
factor = settings.CONSUMER_BARCODE_UPSCALE factor = settings.CONSUMER_BARCODE_UPSCALE
if factor > 1.0: if factor > 1.0:
logger.debug( logger.debug(
@ -248,11 +274,17 @@ class BarcodePlugin(ConsumeTaskPlugin):
(int(round(x * factor)), (int(round(y * factor)))), (int(round(x * factor)), (int(round(y * factor)))),
) )
# Detect barcodes
logger.debug("Detecting barcodes")
for barcode_value in reader(page): for barcode_value in reader(page):
self.barcodes.append( self.barcodes.append(
Barcode(current_page_number, barcode_value), Barcode(current_page_number, barcode_value),
) )
# Delete temporary image file
logger.debug("Cleaning up temporary image file")
os.remove(page_filename)
# Password protected files can't be checked # Password protected files can't be checked
# This is the exception raised for those # This is the exception raised for those
except PDFPageCountError as e: except PDFPageCountError as e:

View File

@ -925,6 +925,8 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300) CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_MAX_PAGES", 0)
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean( CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE", "PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
) )