Differentiate generic vs parse errors during consumption

This commit is contained in:
shamoon 2023-08-31 14:19:51 -07:00
parent cb62afb053
commit 87f6f12fae
3 changed files with 67 additions and 5 deletions

View File

@ -34,6 +34,7 @@ from .models import DocumentType
from .models import FileInfo from .models import FileInfo
from .models import Tag from .models import Tag
from .parsers import DocumentParser from .parsers import DocumentParser
from .parsers import ParseError
from .parsers import get_parser_class_for_mime_type from .parsers import get_parser_class_for_mime_type
from .parsers import parse_date from .parsers import parse_date
from .signals import document_consumption_finished from .signals import document_consumption_finished
@ -448,12 +449,19 @@ class Consumer(LoggingMixin):
date = parse_date(self.filename, text) date = parse_date(self.filename, text)
archive_path = document_parser.get_archive_path() archive_path = document_parser.get_archive_path()
except ParseError as e:
self._fail(
str(e),
f"Error occurred while consuming document {self.filename}: {e}",
exc_info=True,
exception=e,
)
except Exception as e: except Exception as e:
document_parser.cleanup() document_parser.cleanup()
tempdir.cleanup() tempdir.cleanup()
self._fail( self._fail(
str(e), str(e),
f"Error while consuming document {self.filename}: {e}", f"Unexpected error while consuming document {self.filename}: {e}",
exc_info=True, exc_info=True,
exception=e, exception=e,
) )
@ -543,8 +551,8 @@ class Consumer(LoggingMixin):
except Exception as e: except Exception as e:
self._fail( self._fail(
str(e), str(e),
f"The following error occurred while consuming " f"The following error occurred while storing document "
f"{self.filename}: {e}", f"{self.filename} after consuming: {e}",
exc_info=True, exc_info=True,
exception=e, exception=e,
) )

View File

@ -211,6 +211,18 @@ class FaultyParser(DocumentParser):
raise ParseError("Does not compute.") raise ParseError("Does not compute.")
class FaultyGenericExceptionParser(DocumentParser):
def __init__(self, logging_group, scratch_dir):
super().__init__(logging_group)
_, self.fake_thumb = tempfile.mkstemp(suffix=".webp", dir=scratch_dir)
def get_thumbnail(self, document_path, mime_type, file_name=None):
return self.fake_thumb
def parse(self, document_path, mime_type, file_name=None):
raise Exception("Generic exception.")
def fake_magic_from_file(file, mime=False): def fake_magic_from_file(file, mime=False):
if mime: if mime:
if os.path.splitext(file)[1] == ".pdf": if os.path.splitext(file)[1] == ".pdf":
@ -260,6 +272,13 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
def make_faulty_parser(self, logging_group, progress_callback=None): def make_faulty_parser(self, logging_group, progress_callback=None):
return FaultyParser(logging_group, self.dirs.scratch_dir) return FaultyParser(logging_group, self.dirs.scratch_dir)
def make_faulty_generic_exception_parser(
self,
logging_group,
progress_callback=None,
):
return FaultyGenericExceptionParser(logging_group, self.dirs.scratch_dir)
def setUp(self): def setUp(self):
super().setUp() super().setUp()
@ -496,7 +515,29 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertRaisesMessage( self.assertRaisesMessage(
ConsumerError, ConsumerError,
"sample.pdf: Error while consuming document sample.pdf: Does not compute.", "sample.pdf: Error occurred while consuming document sample.pdf: Does not compute.",
self.consumer.try_consume_file,
self.get_test_file(),
)
self._assert_first_last_send_progress(last_status="FAILED")
@mock.patch("documents.parsers.document_consumer_declaration.send")
def testGenericParserException(self, m):
m.return_value = [
(
None,
{
"parser": self.make_faulty_generic_exception_parser,
"mime_types": {"application/pdf": ".pdf"},
"weight": 0,
},
),
]
self.assertRaisesMessage(
ConsumerError,
"sample.pdf: Unexpected error while consuming document sample.pdf: Generic exception.",
self.consumer.try_consume_file, self.consumer.try_consume_file,
self.get_test_file(), self.get_test_file(),
) )
@ -510,7 +551,7 @@ class TestConsumer(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertRaisesMessage( self.assertRaisesMessage(
ConsumerError, ConsumerError,
"sample.pdf: The following error occurred while consuming sample.pdf: NO.", "sample.pdf: The following error occurred while storing document sample.pdf after consuming: NO.",
self.consumer.try_consume_file, self.consumer.try_consume_file,
filename, filename,
) )

View File

@ -8,6 +8,7 @@ from unittest import mock
from django.test import TestCase from django.test import TestCase
from django.test import override_settings from django.test import override_settings
from ocrmypdf import SubprocessOutputError
from documents.parsers import ParseError from documents.parsers import ParseError
from documents.parsers import run_convert from documents.parsers import run_convert
@ -827,6 +828,18 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
# Copied from the PDF to here. Don't even look at it # Copied from the PDF to here. Don't even look at it
self.assertIn("ةﯾﻠﺧﺎدﻻ ةرازو", parser.get_text()) self.assertIn("ةﯾﻠﺧﺎدﻻ ةرازو", parser.get_text())
@mock.patch("ocrmypdf.ocr")
def test_gs_rendering_error(self, m):
m.side_effect = SubprocessOutputError("Ghostscript PDF/A rendering failed")
parser = RasterisedDocumentParser(None)
self.assertRaises(
ParseError,
parser.parse,
os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
"application/pdf",
)
class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase): class TestParserFileTypes(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples") SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")