Detect and reset invalid ASNs to 0 during indexing with a loud error to the user
This commit is contained in:
		
							parent
							
								
									a203b006e7
								
							
						
					
					
						commit
						0f536a9b9a
					
				@ -146,11 +146,16 @@ class Consumer(LoggingMixin):
 | 
			
		||||
            return
 | 
			
		||||
        # Validate the range is above zero and less than uint32_t max
 | 
			
		||||
        # otherwise, Whoosh can't handle it in the index
 | 
			
		||||
        if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF:
 | 
			
		||||
        if (
 | 
			
		||||
            self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
 | 
			
		||||
            or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
 | 
			
		||||
        ):
 | 
			
		||||
            self._fail(
 | 
			
		||||
                MESSAGE_ASN_RANGE,
 | 
			
		||||
                f"Not consuming {self.filename}: "
 | 
			
		||||
                f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]",
 | 
			
		||||
                f"Given ASN {self.override_asn} is out of range "
 | 
			
		||||
                f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
 | 
			
		||||
                f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
 | 
			
		||||
            )
 | 
			
		||||
        if Document.objects.filter(archive_serial_number=self.override_asn).exists():
 | 
			
		||||
            self._fail(
 | 
			
		||||
 | 
			
		||||
@ -90,10 +90,22 @@ def open_index_searcher():
 | 
			
		||||
        searcher.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def update_document(writer, doc):
 | 
			
		||||
def update_document(writer: AsyncWriter, doc: Document):
 | 
			
		||||
    tags = ",".join([t.name for t in doc.tags.all()])
 | 
			
		||||
    tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
 | 
			
		||||
    comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
 | 
			
		||||
    asn = doc.archive_serial_number
 | 
			
		||||
    if asn is not None and (
 | 
			
		||||
        asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
 | 
			
		||||
        or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
 | 
			
		||||
    ):
 | 
			
		||||
        logger.error(
 | 
			
		||||
            f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
 | 
			
		||||
            f"ASN is out of range "
 | 
			
		||||
            f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
 | 
			
		||||
            f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
 | 
			
		||||
        )
 | 
			
		||||
        asn = 0
 | 
			
		||||
    writer.update_document(
 | 
			
		||||
        id=doc.pk,
 | 
			
		||||
        title=doc.title,
 | 
			
		||||
@ -109,7 +121,7 @@ def update_document(writer, doc):
 | 
			
		||||
        has_type=doc.document_type is not None,
 | 
			
		||||
        created=doc.created,
 | 
			
		||||
        added=doc.added,
 | 
			
		||||
        asn=doc.archive_serial_number,
 | 
			
		||||
        asn=asn,
 | 
			
		||||
        modified=doc.modified,
 | 
			
		||||
        path=doc.storage_path.name if doc.storage_path else None,
 | 
			
		||||
        path_id=doc.storage_path.id if doc.storage_path else None,
 | 
			
		||||
 | 
			
		||||
@ -3,6 +3,7 @@ import logging
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
from collections import OrderedDict
 | 
			
		||||
from typing import Final
 | 
			
		||||
from typing import Optional
 | 
			
		||||
 | 
			
		||||
import dateutil.parser
 | 
			
		||||
@ -229,6 +230,9 @@ class Document(models.Model):
 | 
			
		||||
        help_text=_("The original name of the file when it was uploaded"),
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
 | 
			
		||||
    ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
 | 
			
		||||
 | 
			
		||||
    archive_serial_number = models.PositiveIntegerField(
 | 
			
		||||
        _("archive serial number"),
 | 
			
		||||
        blank=True,
 | 
			
		||||
@ -236,8 +240,8 @@ class Document(models.Model):
 | 
			
		||||
        unique=True,
 | 
			
		||||
        db_index=True,
 | 
			
		||||
        validators=[
 | 
			
		||||
            MaxValueValidator(0xFF_FF_FF_FF),
 | 
			
		||||
            MinValueValidator(0),
 | 
			
		||||
            MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
 | 
			
		||||
            MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
 | 
			
		||||
        ],
 | 
			
		||||
        help_text=_(
 | 
			
		||||
            "The position of this document in your physical document " "archive.",
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,5 @@
 | 
			
		||||
from unittest import mock
 | 
			
		||||
 | 
			
		||||
from django.test import TestCase
 | 
			
		||||
from documents import index
 | 
			
		||||
from documents.models import Document
 | 
			
		||||
@ -31,3 +33,60 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
 | 
			
		||||
        )
 | 
			
		||||
        self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
 | 
			
		||||
        self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
 | 
			
		||||
 | 
			
		||||
    def test_archive_serial_number_ranging(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - Document with an archive serial number above schema allowed size
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - Document is provided to the index
 | 
			
		||||
        THEN:
 | 
			
		||||
            - Error is logged
 | 
			
		||||
            - Document ASN is reset to 0 for the index
 | 
			
		||||
        """
 | 
			
		||||
        doc1 = Document.objects.create(
 | 
			
		||||
            title="doc1",
 | 
			
		||||
            checksum="A",
 | 
			
		||||
            content="test test2 test3",
 | 
			
		||||
            # yes, this is allowed, unless full_clean is run
 | 
			
		||||
            # DRF does call the validators, this test won't
 | 
			
		||||
            archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
 | 
			
		||||
        )
 | 
			
		||||
        with self.assertLogs("paperless.index", level="ERROR") as cm:
 | 
			
		||||
            with mock.patch(
 | 
			
		||||
                "documents.index.AsyncWriter.update_document",
 | 
			
		||||
            ) as mocked_update_doc:
 | 
			
		||||
                index.add_or_update_document(doc1)
 | 
			
		||||
 | 
			
		||||
                mocked_update_doc.assert_called_once()
 | 
			
		||||
                _, kwargs = mocked_update_doc.call_args
 | 
			
		||||
 | 
			
		||||
                self.assertEqual(kwargs["asn"], 0)
 | 
			
		||||
 | 
			
		||||
                error_str = cm.output[0]
 | 
			
		||||
                expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
 | 
			
		||||
                self.assertIn(expected_str, error_str)
 | 
			
		||||
 | 
			
		||||
    def test_archive_serial_number_is_none(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - Document with no archive serial number
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - Document is provided to the index
 | 
			
		||||
        THEN:
 | 
			
		||||
            - ASN isn't touched
 | 
			
		||||
        """
 | 
			
		||||
        doc1 = Document.objects.create(
 | 
			
		||||
            title="doc1",
 | 
			
		||||
            checksum="A",
 | 
			
		||||
            content="test test2 test3",
 | 
			
		||||
        )
 | 
			
		||||
        with mock.patch(
 | 
			
		||||
            "documents.index.AsyncWriter.update_document",
 | 
			
		||||
        ) as mocked_update_doc:
 | 
			
		||||
            index.add_or_update_document(doc1)
 | 
			
		||||
 | 
			
		||||
            mocked_update_doc.assert_called_once()
 | 
			
		||||
            _, kwargs = mocked_update_doc.call_args
 | 
			
		||||
 | 
			
		||||
            self.assertIsNone(kwargs["asn"])
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user