Reduced storage requirements for barcode scanning. Added limit for pages to scan barcodes on.
This commit is contained in:
parent
4903e4290d
commit
89d3eb5bc3
@ -1289,6 +1289,18 @@ combination with PAPERLESS_CONSUMER_BARCODE_UPSCALE bigger than 1.0.
|
|||||||
|
|
||||||
Defaults to "300"
|
Defaults to "300"
|
||||||
|
|
||||||
|
#### [`PAPERLESS_CONSUMER_BARCODE_MAX_PAGES=<int>`](#PAPERLESS_CONSUMER_BARCODE_MAX_PAGES) {#PAPERLESS_CONSUMER_BARCODE_MAX_PAGES}
|
||||||
|
|
||||||
|
: Barcode detection is a computationaly intensive operation. This setting
|
||||||
|
limits the recognition of barcodes to a number of first pages. Since many
|
||||||
|
scanners have a limit for the number of pages that can be scanned it is
|
||||||
|
sensible to set this as the limit here. If a longer document is encountered
|
||||||
|
it is imported directly and not via scanner there should also be no barcodes
|
||||||
|
present. By setting this value to 0 the upper limit is disabled. This is also
|
||||||
|
the default setting.
|
||||||
|
|
||||||
|
Defaults to "0"
|
||||||
|
|
||||||
#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE}
|
#### [`PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE=<bool>`](#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE) {#PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE}
|
||||||
|
|
||||||
: Enables the detection of barcodes in the scanned document and
|
: Enables the detection of barcodes in the scanned document and
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@ -7,6 +8,7 @@ from typing import Optional
|
|||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from pdf2image import convert_from_path
|
from pdf2image import convert_from_path
|
||||||
|
from pdf2image import pdfinfo_from_path
|
||||||
from pdf2image.exceptions import PDFPageCountError
|
from pdf2image.exceptions import PDFPageCountError
|
||||||
from pikepdf import Page
|
from pikepdf import Page
|
||||||
from pikepdf import Pdf
|
from pikepdf import Pdf
|
||||||
@ -231,13 +233,37 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
logger.debug("Scanning for barcodes using ZXING")
|
logger.debug("Scanning for barcodes using ZXING")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pages_from_path = convert_from_path(
|
# Read number of pages from pdf
|
||||||
self.pdf_file,
|
num_of_pages = pdfinfo_from_path(self.pdf_file)["Pages"]
|
||||||
dpi=settings.CONSUMER_BARCODE_DPI,
|
logger.debug(f"PDF has {num_of_pages} pages")
|
||||||
output_folder=self.temp_dir.name,
|
|
||||||
)
|
|
||||||
|
|
||||||
for current_page_number, page in enumerate(pages_from_path):
|
# Get limit from configuration
|
||||||
|
barcode_max_pages = settings.CONSUMER_BARCODE_MAX_PAGES
|
||||||
|
if barcode_max_pages == 0:
|
||||||
|
barcode_max_pages = num_of_pages
|
||||||
|
|
||||||
|
if barcode_max_pages < num_of_pages:
|
||||||
|
logger.debug(f"Reading of barcodes is limited to the first {barcode_max_pages} pages")
|
||||||
|
|
||||||
|
# Loop al page
|
||||||
|
for current_page_number in range(min(num_of_pages, barcode_max_pages)):
|
||||||
|
logger.debug(f"Processing page {current_page_number}")
|
||||||
|
|
||||||
|
# Convert page to image
|
||||||
|
logger.debug("Converting page to image")
|
||||||
|
page = convert_from_path(
|
||||||
|
self.pdf_file,
|
||||||
|
dpi=settings.CONSUMER_BARCODE_DPI,
|
||||||
|
output_folder=self.temp_dir.name,
|
||||||
|
first_page=current_page_number + 1,
|
||||||
|
last_page=current_page_number + 1,
|
||||||
|
)[0]
|
||||||
|
|
||||||
|
# Remember filename, since it is lost by upscaling
|
||||||
|
page_filename = page.filename
|
||||||
|
logger.debug(f"Image is at {page_filename}")
|
||||||
|
|
||||||
|
# Upscale image if configured
|
||||||
factor = settings.CONSUMER_BARCODE_UPSCALE
|
factor = settings.CONSUMER_BARCODE_UPSCALE
|
||||||
if factor > 1.0:
|
if factor > 1.0:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
@ -248,11 +274,17 @@ class BarcodePlugin(ConsumeTaskPlugin):
|
|||||||
(int(round(x * factor)), (int(round(y * factor)))),
|
(int(round(x * factor)), (int(round(y * factor)))),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Detect barcodes
|
||||||
|
logger.debug("Detecting barcodes")
|
||||||
for barcode_value in reader(page):
|
for barcode_value in reader(page):
|
||||||
self.barcodes.append(
|
self.barcodes.append(
|
||||||
Barcode(current_page_number, barcode_value),
|
Barcode(current_page_number, barcode_value),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Delete temporary image file
|
||||||
|
logger.debug("Cleaning up temporary image file")
|
||||||
|
os.remove(page_filename)
|
||||||
|
|
||||||
# Password protected files can't be checked
|
# Password protected files can't be checked
|
||||||
# This is the exception raised for those
|
# This is the exception raised for those
|
||||||
except PDFPageCountError as e:
|
except PDFPageCountError as e:
|
||||||
|
@ -925,6 +925,8 @@ CONSUMER_BARCODE_UPSCALE: Final[float] = __get_float(
|
|||||||
|
|
||||||
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
CONSUMER_BARCODE_DPI: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_DPI", 300)
|
||||||
|
|
||||||
|
CONSUMER_BARCODE_MAX_PAGES: Final[int] = __get_int("PAPERLESS_CONSUMER_BARCODE_MAX_PAGES", 0)
|
||||||
|
|
||||||
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
|
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = __get_boolean(
|
||||||
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user