Initial conversion of the smaller tests into pytest format
This commit is contained in:
parent
1b9cf5121b
commit
fb4d1f2b53
@ -1,4 +1,4 @@
|
|||||||
import os
|
from pathlib import Path
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@ -15,7 +15,7 @@ class TextDocumentParser(DocumentParser):
|
|||||||
|
|
||||||
logging_name = "paperless.parsing.text"
|
logging_name = "paperless.parsing.text"
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
def get_thumbnail(self, document_path: Path, mime_type, file_name=None) -> Path:
|
||||||
text = self.read_file_handle_unicode_errors(document_path)
|
text = self.read_file_handle_unicode_errors(document_path)
|
||||||
|
|
||||||
img = Image.new("RGB", (500, 700), color="white")
|
img = Image.new("RGB", (500, 700), color="white")
|
||||||
@ -27,7 +27,7 @@ class TextDocumentParser(DocumentParser):
|
|||||||
)
|
)
|
||||||
draw.text((5, 5), text, font=font, fill="black")
|
draw.text((5, 5), text, font=font, fill="black")
|
||||||
|
|
||||||
out_path = os.path.join(self.tempdir, "thumb.webp")
|
out_path = self.tempdir / "thumb.webp"
|
||||||
img.save(out_path, format="WEBP")
|
img.save(out_path, format="WEBP")
|
||||||
|
|
||||||
return out_path
|
return out_path
|
||||||
|
30
src/paperless_text/tests/conftest.py
Normal file
30
src/paperless_text/tests/conftest.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
from collections.abc import Generator
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperless_text.parsers import TextDocumentParser
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_dir() -> Path:
|
||||||
|
return (Path(__file__).parent / Path("samples")).resolve()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def text_parser() -> Generator[TextDocumentParser, None, None]:
|
||||||
|
try:
|
||||||
|
parser = TextDocumentParser(logging_group=None)
|
||||||
|
yield parser
|
||||||
|
finally:
|
||||||
|
parser.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_txt_file(sample_dir: Path) -> Path:
|
||||||
|
return sample_dir / "test.txt"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def malformed_txt_file(sample_dir: Path) -> Path:
|
||||||
|
return sample_dir / "decode_error.txt"
|
@ -1,37 +1,26 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.test import TestCase
|
|
||||||
|
|
||||||
from documents.tests.utils import DirectoriesMixin
|
|
||||||
from documents.tests.utils import FileSystemAssertsMixin
|
|
||||||
from paperless_text.parsers import TextDocumentParser
|
from paperless_text.parsers import TextDocumentParser
|
||||||
|
|
||||||
|
|
||||||
class TestTextParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
class TestTextParser:
|
||||||
SAMPLE_DIR = Path(__file__).resolve().parent / "samples"
|
def test_thumbnail(self, text_parser: TextDocumentParser, sample_txt_file: Path):
|
||||||
|
|
||||||
def test_thumbnail(self):
|
|
||||||
parser = TextDocumentParser(None)
|
|
||||||
|
|
||||||
# just make sure that it does not crash
|
# just make sure that it does not crash
|
||||||
f = parser.get_thumbnail(
|
f = text_parser.get_thumbnail(sample_txt_file, "text/plain")
|
||||||
self.SAMPLE_DIR / "test.txt",
|
assert f.exists()
|
||||||
"text/plain",
|
assert f.is_file()
|
||||||
)
|
|
||||||
self.assertIsFile(f)
|
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self, text_parser: TextDocumentParser, sample_txt_file: Path):
|
||||||
parser = TextDocumentParser(None)
|
text_parser.parse(sample_txt_file, "text/plain")
|
||||||
|
|
||||||
parser.parse(
|
assert text_parser.get_text() == "This is a test file.\n"
|
||||||
self.SAMPLE_DIR / "test.txt",
|
assert text_parser.get_archive_path() is None
|
||||||
"text/plain",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(parser.get_text(), "This is a test file.\n")
|
def test_parse_invalid_bytes(
|
||||||
self.assertIsNone(parser.get_archive_path())
|
self,
|
||||||
|
text_parser: TextDocumentParser,
|
||||||
def test_parse_invalid_bytes(self):
|
malformed_txt_file: Path,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Text file which contains invalid UTF bytes
|
- Text file which contains invalid UTF bytes
|
||||||
@ -41,12 +30,8 @@ class TestTextParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
- Parsing continues
|
- Parsing continues
|
||||||
- Invalid bytes are removed
|
- Invalid bytes are removed
|
||||||
"""
|
"""
|
||||||
parser = TextDocumentParser(None)
|
|
||||||
|
|
||||||
parser.parse(
|
text_parser.parse(malformed_txt_file, "text/plain")
|
||||||
self.SAMPLE_DIR / "decode_error.txt",
|
|
||||||
"text/plain",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertEqual(parser.get_text(), "Pantothens<EFBFBD>ure\n")
|
assert text_parser.get_text() == "Pantothens<EFBFBD>ure\n"
|
||||||
self.assertIsNone(parser.get_archive_path())
|
assert text_parser.get_archive_path() is None
|
||||||
|
40
src/paperless_tika/tests/conftest.py
Normal file
40
src/paperless_tika/tests/conftest.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
from collections.abc import Generator
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from paperless_tika.parsers import TikaDocumentParser
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def tika_parser() -> Generator[TikaDocumentParser, None, None]:
|
||||||
|
try:
|
||||||
|
parser = TikaDocumentParser(logging_group=None)
|
||||||
|
yield parser
|
||||||
|
finally:
|
||||||
|
parser.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_dir() -> Path:
|
||||||
|
return (Path(__file__).parent / Path("samples")).resolve()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_odt_file(sample_dir: Path) -> Path:
|
||||||
|
return sample_dir / "sample.odt"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_docx_file(sample_dir: Path) -> Path:
|
||||||
|
return sample_dir / "sample.docx"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_doc_file(sample_dir: Path) -> Path:
|
||||||
|
return sample_dir / "sample.doc"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def sample_broken_odt(sample_dir: Path) -> Path:
|
||||||
|
return sample_dir / "multi-part-broken.odt"
|
@ -1,9 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Final
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from django.test import TestCase
|
|
||||||
|
|
||||||
from documents.tests.utils import util_call_with_backoff
|
from documents.tests.utils import util_call_with_backoff
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
from paperless_tika.parsers import TikaDocumentParser
|
||||||
@ -13,22 +11,19 @@ from paperless_tika.parsers import TikaDocumentParser
|
|||||||
"PAPERLESS_CI_TEST" not in os.environ,
|
"PAPERLESS_CI_TEST" not in os.environ,
|
||||||
reason="No Gotenberg/Tika servers to test with",
|
reason="No Gotenberg/Tika servers to test with",
|
||||||
)
|
)
|
||||||
class TestTikaParserAgainstServer(TestCase):
|
@pytest.mark.django_db()
|
||||||
|
class TestTikaParserAgainstServer:
|
||||||
"""
|
"""
|
||||||
This test case tests the Tika parsing against a live tika server,
|
This test case tests the Tika parsing against a live tika server,
|
||||||
if the environment contains the correct value indicating such a server
|
if the environment contains the correct value indicating such a server
|
||||||
is available.
|
is available.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SAMPLE_DIR: Final[Path] = (Path(__file__).parent / Path("samples")).resolve()
|
def test_basic_parse_odt(
|
||||||
|
self,
|
||||||
def setUp(self) -> None:
|
tika_parser: TikaDocumentParser,
|
||||||
self.parser = TikaDocumentParser(logging_group=None)
|
sample_odt_file: Path,
|
||||||
|
):
|
||||||
def tearDown(self) -> None:
|
|
||||||
self.parser.cleanup()
|
|
||||||
|
|
||||||
def test_basic_parse_odt(self):
|
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- An input ODT format document
|
- An input ODT format document
|
||||||
@ -38,26 +33,26 @@ class TestTikaParserAgainstServer(TestCase):
|
|||||||
- Document content is correct
|
- Document content is correct
|
||||||
- Document date is correct
|
- Document date is correct
|
||||||
"""
|
"""
|
||||||
test_file = self.SAMPLE_DIR / Path("sample.odt")
|
|
||||||
|
|
||||||
util_call_with_backoff(
|
util_call_with_backoff(
|
||||||
self.parser.parse,
|
tika_parser.parse,
|
||||||
[test_file, "application/vnd.oasis.opendocument.text"],
|
[sample_odt_file, "application/vnd.oasis.opendocument.text"],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
assert (
|
||||||
self.parser.text,
|
tika_parser.text
|
||||||
"This is an ODT test document, created September 14, 2022",
|
== "This is an ODT test document, created September 14, 2022"
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(self.parser.archive_path)
|
assert tika_parser.archive_path is not None
|
||||||
with open(self.parser.archive_path, "rb") as f:
|
assert b"PDF-" in tika_parser.archive_path.read_bytes()[:10]
|
||||||
# PDFs begin with the bytes PDF-x.y
|
|
||||||
self.assertTrue(b"PDF-" in f.read()[:10])
|
|
||||||
|
|
||||||
# TODO: Unsure what can set the Creation-Date field in a document, enable when possible
|
# TODO: Unsure what can set the Creation-Date field in a document, enable when possible
|
||||||
# self.assertEqual(self.parser.date, datetime.datetime(2022, 9, 14))
|
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
|
||||||
|
|
||||||
def test_basic_parse_docx(self):
|
def test_basic_parse_docx(
|
||||||
|
self,
|
||||||
|
tika_parser: TikaDocumentParser,
|
||||||
|
sample_docx_file: Path,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- An input DOCX format document
|
- An input DOCX format document
|
||||||
@ -67,27 +62,29 @@ class TestTikaParserAgainstServer(TestCase):
|
|||||||
- Document content is correct
|
- Document content is correct
|
||||||
- Document date is correct
|
- Document date is correct
|
||||||
"""
|
"""
|
||||||
test_file = self.SAMPLE_DIR / Path("sample.docx")
|
|
||||||
|
|
||||||
util_call_with_backoff(
|
util_call_with_backoff(
|
||||||
self.parser.parse,
|
tika_parser.parse,
|
||||||
[
|
[
|
||||||
test_file,
|
sample_docx_file,
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
assert (
|
||||||
self.parser.text,
|
tika_parser.text
|
||||||
"This is an DOCX test document, also made September 14, 2022",
|
== "This is an DOCX test document, also made September 14, 2022"
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(self.parser.archive_path)
|
assert tika_parser.archive_path is not None
|
||||||
with open(self.parser.archive_path, "rb") as f:
|
with open(tika_parser.archive_path, "rb") as f:
|
||||||
self.assertTrue(b"PDF-" in f.read()[:10])
|
assert b"PDF-" in f.read()[:10]
|
||||||
|
|
||||||
# self.assertEqual(self.parser.date, datetime.datetime(2022, 9, 14))
|
# self.assertEqual(tika_parser.date, datetime.datetime(2022, 9, 14))
|
||||||
|
|
||||||
def test_basic_parse_doc(self):
|
def test_basic_parse_doc(
|
||||||
|
self,
|
||||||
|
tika_parser: TikaDocumentParser,
|
||||||
|
sample_doc_file: Path,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- An input DOC format document
|
- An input DOC format document
|
||||||
@ -97,22 +94,24 @@ class TestTikaParserAgainstServer(TestCase):
|
|||||||
- Document content is correct
|
- Document content is correct
|
||||||
- Document date is correct
|
- Document date is correct
|
||||||
"""
|
"""
|
||||||
test_file = self.SAMPLE_DIR / "sample.doc"
|
|
||||||
|
|
||||||
util_call_with_backoff(
|
util_call_with_backoff(
|
||||||
self.parser.parse,
|
tika_parser.parse,
|
||||||
[test_file, "application/msword"],
|
[sample_doc_file, "application/msword"],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn(
|
assert (
|
||||||
"his is a test document, saved in the older .doc format",
|
"This is a test document, saved in the older .doc format"
|
||||||
self.parser.text,
|
in tika_parser.text
|
||||||
)
|
)
|
||||||
self.assertIsNotNone(self.parser.archive_path)
|
assert tika_parser.archive_path is not None
|
||||||
with open(self.parser.archive_path, "rb") as f:
|
with open(tika_parser.archive_path, "rb") as f:
|
||||||
self.assertTrue(b"PDF-" in f.read()[:10])
|
assert b"PDF-" in f.read()[:10]
|
||||||
|
|
||||||
def test_tika_fails_multi_part(self):
|
def test_tika_fails_multi_part(
|
||||||
|
self,
|
||||||
|
tika_parser: TikaDocumentParser,
|
||||||
|
sample_broken_odt: Path,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- An input ODT format document
|
- An input ODT format document
|
||||||
@ -125,13 +124,11 @@ class TestTikaParserAgainstServer(TestCase):
|
|||||||
See also:
|
See also:
|
||||||
- https://issues.apache.org/jira/browse/TIKA-4110
|
- https://issues.apache.org/jira/browse/TIKA-4110
|
||||||
"""
|
"""
|
||||||
test_file = self.SAMPLE_DIR / "multi-part-broken.odt"
|
|
||||||
|
|
||||||
util_call_with_backoff(
|
util_call_with_backoff(
|
||||||
self.parser.parse,
|
tika_parser.parse,
|
||||||
[test_file, "application/vnd.oasis.opendocument.text"],
|
[sample_broken_odt, "application/vnd.oasis.opendocument.text"],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIsNotNone(self.parser.archive_path)
|
assert tika_parser.archive_path is not None
|
||||||
with open(self.parser.archive_path, "rb") as f:
|
with open(tika_parser.archive_path, "rb") as f:
|
||||||
self.assertTrue(b"PDF-" in f.read()[:10])
|
assert b"PDF-" in f.read()[:10]
|
||||||
|
@ -1,30 +1,30 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import os
|
|
||||||
import zoneinfo
|
import zoneinfo
|
||||||
|
from http import HTTPStatus
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from django.test import TestCase
|
import pytest
|
||||||
from django.test import override_settings
|
|
||||||
from httpx import codes
|
from httpx import codes
|
||||||
from httpx._multipart import DataField
|
from httpx._multipart import DataField
|
||||||
from rest_framework import status
|
from pytest_django.fixtures import SettingsWrapper
|
||||||
|
from pytest_httpx import HTTPXMock
|
||||||
|
|
||||||
from documents.parsers import ParseError
|
from documents.parsers import ParseError
|
||||||
from paperless_tika.parsers import TikaDocumentParser
|
from paperless_tika.parsers import TikaDocumentParser
|
||||||
from paperless_tika.tests.utils import HttpxMockMixin
|
|
||||||
|
|
||||||
|
|
||||||
class TestTikaParser(HttpxMockMixin, TestCase):
|
@pytest.mark.django_db()
|
||||||
def setUp(self) -> None:
|
class TestTikaParser:
|
||||||
self.parser = TikaDocumentParser(logging_group=None)
|
def test_parse(
|
||||||
|
self,
|
||||||
def tearDown(self) -> None:
|
httpx_mock: HTTPXMock,
|
||||||
self.parser.cleanup()
|
settings: SettingsWrapper,
|
||||||
|
tika_parser: TikaDocumentParser,
|
||||||
@override_settings(TIME_ZONE="America/Chicago")
|
sample_odt_file: Path,
|
||||||
def test_parse(self):
|
):
|
||||||
|
settings.TIME_ZONE = "America/Chicago"
|
||||||
# Pretend parse response
|
# Pretend parse response
|
||||||
self.httpx_mock.add_response(
|
httpx_mock.add_response(
|
||||||
json={
|
json={
|
||||||
"Content-Type": "application/vnd.oasis.opendocument.text",
|
"Content-Type": "application/vnd.oasis.opendocument.text",
|
||||||
"X-TIKA:Parsed-By": [],
|
"X-TIKA:Parsed-By": [],
|
||||||
@ -33,30 +33,29 @@ class TestTikaParser(HttpxMockMixin, TestCase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
# Pretend convert to PDF response
|
# Pretend convert to PDF response
|
||||||
self.httpx_mock.add_response(content=b"PDF document")
|
httpx_mock.add_response(content=b"PDF document")
|
||||||
|
|
||||||
file = Path(os.path.join(self.parser.tempdir, "input.odt"))
|
tika_parser.parse(sample_odt_file, "application/vnd.oasis.opendocument.text")
|
||||||
file.touch()
|
|
||||||
|
|
||||||
self.parser.parse(file, "application/vnd.oasis.opendocument.text")
|
assert tika_parser.text == "the content"
|
||||||
|
assert tika_parser.archive_path is not None
|
||||||
|
with open(tika_parser.archive_path, "rb") as f:
|
||||||
|
assert f.read() == b"PDF document"
|
||||||
|
|
||||||
self.assertEqual(self.parser.text, "the content")
|
assert tika_parser.date == datetime.datetime(
|
||||||
self.assertIsNotNone(self.parser.archive_path)
|
2020,
|
||||||
with open(self.parser.archive_path, "rb") as f:
|
11,
|
||||||
self.assertEqual(f.read(), b"PDF document")
|
21,
|
||||||
|
tzinfo=zoneinfo.ZoneInfo("America/Chicago"),
|
||||||
self.assertEqual(
|
|
||||||
self.parser.date,
|
|
||||||
datetime.datetime(
|
|
||||||
2020,
|
|
||||||
11,
|
|
||||||
21,
|
|
||||||
tzinfo=zoneinfo.ZoneInfo("America/Chicago"),
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_metadata(self):
|
def test_metadata(
|
||||||
self.httpx_mock.add_response(
|
self,
|
||||||
|
httpx_mock: HTTPXMock,
|
||||||
|
tika_parser: TikaDocumentParser,
|
||||||
|
sample_odt_file: Path,
|
||||||
|
):
|
||||||
|
httpx_mock.add_response(
|
||||||
json={
|
json={
|
||||||
"Content-Type": "application/vnd.oasis.opendocument.text",
|
"Content-Type": "application/vnd.oasis.opendocument.text",
|
||||||
"X-TIKA:Parsed-By": [],
|
"X-TIKA:Parsed-By": [],
|
||||||
@ -65,18 +64,20 @@ class TestTikaParser(HttpxMockMixin, TestCase):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
file = Path(os.path.join(self.parser.tempdir, "input.odt"))
|
metadata = tika_parser.extract_metadata(
|
||||||
file.touch()
|
sample_odt_file,
|
||||||
|
|
||||||
metadata = self.parser.extract_metadata(
|
|
||||||
file,
|
|
||||||
"application/vnd.oasis.opendocument.text",
|
"application/vnd.oasis.opendocument.text",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertTrue("dcterms:created" in [m["key"] for m in metadata])
|
assert "dcterms:created" in [m["key"] for m in metadata]
|
||||||
self.assertTrue("Some-key" in [m["key"] for m in metadata])
|
assert "Some-key" in [m["key"] for m in metadata]
|
||||||
|
|
||||||
def test_convert_failure(self):
|
def test_convert_failure(
|
||||||
|
self,
|
||||||
|
httpx_mock: HTTPXMock,
|
||||||
|
tika_parser: TikaDocumentParser,
|
||||||
|
sample_odt_file: Path,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Document needs to be converted to PDF
|
- Document needs to be converted to PDF
|
||||||
@ -86,15 +87,29 @@ class TestTikaParser(HttpxMockMixin, TestCase):
|
|||||||
- Parse error is raised
|
- Parse error is raised
|
||||||
"""
|
"""
|
||||||
# Pretend convert to PDF response
|
# Pretend convert to PDF response
|
||||||
self.httpx_mock.add_response(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
httpx_mock.add_response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
|
||||||
|
|
||||||
file = Path(os.path.join(self.parser.tempdir, "input.odt"))
|
with pytest.raises(ParseError):
|
||||||
file.touch()
|
tika_parser.convert_to_pdf(sample_odt_file, None)
|
||||||
|
|
||||||
with self.assertRaises(ParseError):
|
@pytest.mark.parametrize(
|
||||||
self.parser.convert_to_pdf(file, None)
|
("setting_value", "expected_form_value"),
|
||||||
|
[
|
||||||
def test_request_pdf_a_format(self):
|
("pdfa", "PDF/A-2b"),
|
||||||
|
("pdfa-2", "PDF/A-2b"),
|
||||||
|
("pdfa-1", "PDF/A-1a"),
|
||||||
|
("pdfa-3", "PDF/A-3b"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_request_pdf_a_format(
|
||||||
|
self,
|
||||||
|
setting_value: str,
|
||||||
|
expected_form_value: str,
|
||||||
|
httpx_mock: HTTPXMock,
|
||||||
|
settings: SettingsWrapper,
|
||||||
|
tika_parser: TikaDocumentParser,
|
||||||
|
sample_odt_file: Path,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
GIVEN:
|
GIVEN:
|
||||||
- Document needs to be converted to PDF
|
- Document needs to be converted to PDF
|
||||||
@ -103,31 +118,21 @@ class TestTikaParser(HttpxMockMixin, TestCase):
|
|||||||
THEN:
|
THEN:
|
||||||
- Request to Gotenberg contains the expected PDF/A format string
|
- Request to Gotenberg contains the expected PDF/A format string
|
||||||
"""
|
"""
|
||||||
file = Path(os.path.join(self.parser.tempdir, "input.odt"))
|
settings.OCR_OUTPUT_TYPE = setting_value
|
||||||
file.touch()
|
httpx_mock.add_response(
|
||||||
|
status_code=codes.OK,
|
||||||
|
content=b"PDF document",
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
|
||||||
for setting, expected_key in [
|
tika_parser.convert_to_pdf(sample_odt_file, None)
|
||||||
("pdfa", "PDF/A-2b"),
|
|
||||||
("pdfa-2", "PDF/A-2b"),
|
|
||||||
("pdfa-1", "PDF/A-2b"),
|
|
||||||
("pdfa-3", "PDF/A-3b"),
|
|
||||||
]:
|
|
||||||
with override_settings(OCR_OUTPUT_TYPE=setting):
|
|
||||||
self.httpx_mock.add_response(
|
|
||||||
status_code=codes.OK,
|
|
||||||
content=b"PDF document",
|
|
||||||
method="POST",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.parser.convert_to_pdf(file, None)
|
request = httpx_mock.get_request()
|
||||||
|
found = False
|
||||||
|
for field in request.stream.fields:
|
||||||
|
if isinstance(field, DataField) and field.name == "pdfa":
|
||||||
|
assert field.value == expected_form_value
|
||||||
|
found = True
|
||||||
|
assert found, "pdfFormat was not found"
|
||||||
|
|
||||||
request = self.httpx_mock.get_request()
|
httpx_mock.reset(assert_all_responses_were_requested=False)
|
||||||
found = False
|
|
||||||
for field in request.stream.fields:
|
|
||||||
if isinstance(field, DataField) and field.name == "pdfa":
|
|
||||||
self.assertEqual(field.value, expected_key)
|
|
||||||
found = True
|
|
||||||
break
|
|
||||||
self.assertTrue(found)
|
|
||||||
|
|
||||||
self.httpx_mock.reset(assert_all_responses_were_requested=False)
|
|
||||||
|
@ -2,9 +2,10 @@ import pytest
|
|||||||
from pytest_httpx import HTTPXMock
|
from pytest_httpx import HTTPXMock
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Remove this class once paperless_mail is updated as well
|
||||||
class HttpxMockMixin:
|
class HttpxMockMixin:
|
||||||
@pytest.fixture(autouse=True)
|
@pytest.fixture(autouse=True)
|
||||||
def httpx_mock_auto(self, httpx_mock: HTTPXMock):
|
def _httpx_mock_auto(self, httpx_mock: HTTPXMock):
|
||||||
"""
|
"""
|
||||||
Workaround for allowing use of a fixture with unittest style testing
|
Workaround for allowing use of a fixture with unittest style testing
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user