diff --git a/src/documents/tests/test_api.py b/src/documents/tests/test_api.py index 618494c98..b9942c099 100644 --- a/src/documents/tests/test_api.py +++ b/src/documents/tests/test_api.py @@ -1,12 +1,10 @@ import datetime -import io import json import os import shutil import tempfile import urllib.request import uuid -import zipfile import zoneinfo from datetime import timedelta from pathlib import Path @@ -2057,326 +2055,6 @@ class TestApiUiSettings(DirectoriesMixin, APITestCase): ) -class TestBulkDownload(DirectoriesMixin, APITestCase): - ENDPOINT = "/api/documents/bulk_download/" - - def setUp(self): - super().setUp() - - user = User.objects.create_superuser(username="temp_admin") - self.client.force_authenticate(user=user) - - self.doc1 = Document.objects.create(title="unrelated", checksum="A") - self.doc2 = Document.objects.create( - title="document A", - filename="docA.pdf", - mime_type="application/pdf", - checksum="B", - created=timezone.make_aware(datetime.datetime(2021, 1, 1)), - ) - self.doc2b = Document.objects.create( - title="document A", - filename="docA2.pdf", - mime_type="application/pdf", - checksum="D", - created=timezone.make_aware(datetime.datetime(2021, 1, 1)), - ) - self.doc3 = Document.objects.create( - title="document B", - filename="docB.jpg", - mime_type="image/jpeg", - checksum="C", - created=timezone.make_aware(datetime.datetime(2020, 3, 21)), - archive_filename="docB.pdf", - archive_checksum="D", - ) - - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), - self.doc2.source_path, - ) - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.png"), - self.doc2b.source_path, - ) - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"), - self.doc3.source_path, - ) - shutil.copy( - os.path.join(os.path.dirname(__file__), "samples", "test_with_bom.pdf"), - self.doc3.archive_path, - ) - - def test_download_originals(self): - response = self.client.post( - self.ENDPOINT, - json.dumps( - {"documents": [self.doc2.id, self.doc3.id], "content": "originals"}, - ), - content_type="application/json", - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response["Content-Type"], "application/zip") - - with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: - self.assertEqual(len(zipf.filelist), 2) - self.assertIn("2021-01-01 document A.pdf", zipf.namelist()) - self.assertIn("2020-03-21 document B.jpg", zipf.namelist()) - - with self.doc2.source_file as f: - self.assertEqual(f.read(), zipf.read("2021-01-01 document A.pdf")) - - with self.doc3.source_file as f: - self.assertEqual(f.read(), zipf.read("2020-03-21 document B.jpg")) - - def test_download_default(self): - response = self.client.post( - self.ENDPOINT, - json.dumps({"documents": [self.doc2.id, self.doc3.id]}), - content_type="application/json", - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response["Content-Type"], "application/zip") - - with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: - self.assertEqual(len(zipf.filelist), 2) - self.assertIn("2021-01-01 document A.pdf", zipf.namelist()) - self.assertIn("2020-03-21 document B.pdf", zipf.namelist()) - - with self.doc2.source_file as f: - self.assertEqual(f.read(), zipf.read("2021-01-01 document A.pdf")) - - with self.doc3.archive_file as f: - self.assertEqual(f.read(), zipf.read("2020-03-21 document B.pdf")) - - def test_download_both(self): - response = self.client.post( - self.ENDPOINT, - json.dumps({"documents": [self.doc2.id, self.doc3.id], "content": "both"}), - content_type="application/json", - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response["Content-Type"], "application/zip") - - with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: - self.assertEqual(len(zipf.filelist), 3) - self.assertIn("originals/2021-01-01 document A.pdf", zipf.namelist()) - self.assertIn("archive/2020-03-21 document B.pdf", zipf.namelist()) - self.assertIn("originals/2020-03-21 document B.jpg", zipf.namelist()) - - with self.doc2.source_file as f: - self.assertEqual( - f.read(), - zipf.read("originals/2021-01-01 document A.pdf"), - ) - - with self.doc3.archive_file as f: - self.assertEqual( - f.read(), - zipf.read("archive/2020-03-21 document B.pdf"), - ) - - with self.doc3.source_file as f: - self.assertEqual( - f.read(), - zipf.read("originals/2020-03-21 document B.jpg"), - ) - - def test_filename_clashes(self): - response = self.client.post( - self.ENDPOINT, - json.dumps({"documents": [self.doc2.id, self.doc2b.id]}), - content_type="application/json", - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response["Content-Type"], "application/zip") - - with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: - self.assertEqual(len(zipf.filelist), 2) - - self.assertIn("2021-01-01 document A.pdf", zipf.namelist()) - self.assertIn("2021-01-01 document A_01.pdf", zipf.namelist()) - - with self.doc2.source_file as f: - self.assertEqual(f.read(), zipf.read("2021-01-01 document A.pdf")) - - with self.doc2b.source_file as f: - self.assertEqual(f.read(), zipf.read("2021-01-01 document A_01.pdf")) - - def test_compression(self): - self.client.post( - self.ENDPOINT, - json.dumps( - {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"}, - ), - content_type="application/json", - ) - - @override_settings(FILENAME_FORMAT="{correspondent}/{title}") - def test_formatted_download_originals(self): - """ - GIVEN: - - Defined file naming format - WHEN: - - Bulk download request for original documents - - Bulk download request requests to follow format - THEN: - - Files defined in resulting zipfile are formatted - """ - - c = Correspondent.objects.create(name="test") - c2 = Correspondent.objects.create(name="a space name") - - self.doc2.correspondent = c - self.doc2.title = "This is Doc 2" - self.doc2.save() - - self.doc3.correspondent = c2 - self.doc3.title = "Title 2 - Doc 3" - self.doc3.save() - - response = self.client.post( - self.ENDPOINT, - json.dumps( - { - "documents": [self.doc2.id, self.doc3.id], - "content": "originals", - "follow_formatting": True, - }, - ), - content_type="application/json", - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response["Content-Type"], "application/zip") - - with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: - self.assertEqual(len(zipf.filelist), 2) - self.assertIn("a space name/Title 2 - Doc 3.jpg", zipf.namelist()) - self.assertIn("test/This is Doc 2.pdf", zipf.namelist()) - - with self.doc2.source_file as f: - self.assertEqual(f.read(), zipf.read("test/This is Doc 2.pdf")) - - with self.doc3.source_file as f: - self.assertEqual( - f.read(), - zipf.read("a space name/Title 2 - Doc 3.jpg"), - ) - - @override_settings(FILENAME_FORMAT="somewhere/{title}") - def test_formatted_download_archive(self): - """ - GIVEN: - - Defined file naming format - WHEN: - - Bulk download request for archive documents - - Bulk download request requests to follow format - THEN: - - Files defined in resulting zipfile are formatted - """ - - self.doc2.title = "This is Doc 2" - self.doc2.save() - - self.doc3.title = "Title 2 - Doc 3" - self.doc3.save() - print(self.doc3.archive_path) - print(self.doc3.archive_filename) - - response = self.client.post( - self.ENDPOINT, - json.dumps( - { - "documents": [self.doc2.id, self.doc3.id], - "follow_formatting": True, - }, - ), - content_type="application/json", - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response["Content-Type"], "application/zip") - - with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: - self.assertEqual(len(zipf.filelist), 2) - self.assertIn("somewhere/This is Doc 2.pdf", zipf.namelist()) - self.assertIn("somewhere/Title 2 - Doc 3.pdf", zipf.namelist()) - - with self.doc2.source_file as f: - self.assertEqual(f.read(), zipf.read("somewhere/This is Doc 2.pdf")) - - with self.doc3.archive_file as f: - self.assertEqual(f.read(), zipf.read("somewhere/Title 2 - Doc 3.pdf")) - - @override_settings(FILENAME_FORMAT="{document_type}/{title}") - def test_formatted_download_both(self): - """ - GIVEN: - - Defined file naming format - WHEN: - - Bulk download request for original documents and archive documents - - Bulk download request requests to follow format - THEN: - - Files defined in resulting zipfile are formatted - """ - - dc1 = DocumentType.objects.create(name="bill") - dc2 = DocumentType.objects.create(name="statement") - - self.doc2.document_type = dc1 - self.doc2.title = "This is Doc 2" - self.doc2.save() - - self.doc3.document_type = dc2 - self.doc3.title = "Title 2 - Doc 3" - self.doc3.save() - - response = self.client.post( - self.ENDPOINT, - json.dumps( - { - "documents": [self.doc2.id, self.doc3.id], - "content": "both", - "follow_formatting": True, - }, - ), - content_type="application/json", - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response["Content-Type"], "application/zip") - - with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: - self.assertEqual(len(zipf.filelist), 3) - self.assertIn("originals/bill/This is Doc 2.pdf", zipf.namelist()) - self.assertIn("archive/statement/Title 2 - Doc 3.pdf", zipf.namelist()) - self.assertIn("originals/statement/Title 2 - Doc 3.jpg", zipf.namelist()) - - with self.doc2.source_file as f: - self.assertEqual( - f.read(), - zipf.read("originals/bill/This is Doc 2.pdf"), - ) - - with self.doc3.archive_file as f: - self.assertEqual( - f.read(), - zipf.read("archive/statement/Title 2 - Doc 3.pdf"), - ) - - with self.doc3.source_file as f: - self.assertEqual( - f.read(), - zipf.read("originals/statement/Title 2 - Doc 3.jpg"), - ) - - class TestApiRemoteVersion(DirectoriesMixin, APITestCase): ENDPOINT = "/api/remote_version/" diff --git a/src/documents/tests/test_api_bulk_download.py b/src/documents/tests/test_api_bulk_download.py new file mode 100644 index 000000000..57912c65c --- /dev/null +++ b/src/documents/tests/test_api_bulk_download.py @@ -0,0 +1,337 @@ +import datetime +import io +import json +import os +import shutil +import zipfile + +from django.contrib.auth.models import User +from django.test import override_settings +from django.utils import timezone +from rest_framework import status +from rest_framework.test import APITestCase + +from documents.models import Correspondent +from documents.models import Document +from documents.models import DocumentType +from documents.tests.utils import DirectoriesMixin + + +class TestBulkDownload(DirectoriesMixin, APITestCase): + ENDPOINT = "/api/documents/bulk_download/" + + def setUp(self): + super().setUp() + + user = User.objects.create_superuser(username="temp_admin") + self.client.force_authenticate(user=user) + + self.doc1 = Document.objects.create(title="unrelated", checksum="A") + self.doc2 = Document.objects.create( + title="document A", + filename="docA.pdf", + mime_type="application/pdf", + checksum="B", + created=timezone.make_aware(datetime.datetime(2021, 1, 1)), + ) + self.doc2b = Document.objects.create( + title="document A", + filename="docA2.pdf", + mime_type="application/pdf", + checksum="D", + created=timezone.make_aware(datetime.datetime(2021, 1, 1)), + ) + self.doc3 = Document.objects.create( + title="document B", + filename="docB.jpg", + mime_type="image/jpeg", + checksum="C", + created=timezone.make_aware(datetime.datetime(2020, 3, 21)), + archive_filename="docB.pdf", + archive_checksum="D", + ) + + shutil.copy( + os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), + self.doc2.source_path, + ) + shutil.copy( + os.path.join(os.path.dirname(__file__), "samples", "simple.png"), + self.doc2b.source_path, + ) + shutil.copy( + os.path.join(os.path.dirname(__file__), "samples", "simple.jpg"), + self.doc3.source_path, + ) + shutil.copy( + os.path.join(os.path.dirname(__file__), "samples", "test_with_bom.pdf"), + self.doc3.archive_path, + ) + + def test_download_originals(self): + response = self.client.post( + self.ENDPOINT, + json.dumps( + {"documents": [self.doc2.id, self.doc3.id], "content": "originals"}, + ), + content_type="application/json", + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response["Content-Type"], "application/zip") + + with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: + self.assertEqual(len(zipf.filelist), 2) + self.assertIn("2021-01-01 document A.pdf", zipf.namelist()) + self.assertIn("2020-03-21 document B.jpg", zipf.namelist()) + + with self.doc2.source_file as f: + self.assertEqual(f.read(), zipf.read("2021-01-01 document A.pdf")) + + with self.doc3.source_file as f: + self.assertEqual(f.read(), zipf.read("2020-03-21 document B.jpg")) + + def test_download_default(self): + response = self.client.post( + self.ENDPOINT, + json.dumps({"documents": [self.doc2.id, self.doc3.id]}), + content_type="application/json", + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response["Content-Type"], "application/zip") + + with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: + self.assertEqual(len(zipf.filelist), 2) + self.assertIn("2021-01-01 document A.pdf", zipf.namelist()) + self.assertIn("2020-03-21 document B.pdf", zipf.namelist()) + + with self.doc2.source_file as f: + self.assertEqual(f.read(), zipf.read("2021-01-01 document A.pdf")) + + with self.doc3.archive_file as f: + self.assertEqual(f.read(), zipf.read("2020-03-21 document B.pdf")) + + def test_download_both(self): + response = self.client.post( + self.ENDPOINT, + json.dumps({"documents": [self.doc2.id, self.doc3.id], "content": "both"}), + content_type="application/json", + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response["Content-Type"], "application/zip") + + with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: + self.assertEqual(len(zipf.filelist), 3) + self.assertIn("originals/2021-01-01 document A.pdf", zipf.namelist()) + self.assertIn("archive/2020-03-21 document B.pdf", zipf.namelist()) + self.assertIn("originals/2020-03-21 document B.jpg", zipf.namelist()) + + with self.doc2.source_file as f: + self.assertEqual( + f.read(), + zipf.read("originals/2021-01-01 document A.pdf"), + ) + + with self.doc3.archive_file as f: + self.assertEqual( + f.read(), + zipf.read("archive/2020-03-21 document B.pdf"), + ) + + with self.doc3.source_file as f: + self.assertEqual( + f.read(), + zipf.read("originals/2020-03-21 document B.jpg"), + ) + + def test_filename_clashes(self): + response = self.client.post( + self.ENDPOINT, + json.dumps({"documents": [self.doc2.id, self.doc2b.id]}), + content_type="application/json", + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response["Content-Type"], "application/zip") + + with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: + self.assertEqual(len(zipf.filelist), 2) + + self.assertIn("2021-01-01 document A.pdf", zipf.namelist()) + self.assertIn("2021-01-01 document A_01.pdf", zipf.namelist()) + + with self.doc2.source_file as f: + self.assertEqual(f.read(), zipf.read("2021-01-01 document A.pdf")) + + with self.doc2b.source_file as f: + self.assertEqual(f.read(), zipf.read("2021-01-01 document A_01.pdf")) + + def test_compression(self): + self.client.post( + self.ENDPOINT, + json.dumps( + {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"}, + ), + content_type="application/json", + ) + + @override_settings(FILENAME_FORMAT="{correspondent}/{title}") + def test_formatted_download_originals(self): + """ + GIVEN: + - Defined file naming format + WHEN: + - Bulk download request for original documents + - Bulk download request requests to follow format + THEN: + - Files in resulting zipfile are formatted + """ + + c = Correspondent.objects.create(name="test") + c2 = Correspondent.objects.create(name="a space name") + + self.doc2.correspondent = c + self.doc2.title = "This is Doc 2" + self.doc2.save() + + self.doc3.correspondent = c2 + self.doc3.title = "Title 2 - Doc 3" + self.doc3.save() + + response = self.client.post( + self.ENDPOINT, + json.dumps( + { + "documents": [self.doc2.id, self.doc3.id], + "content": "originals", + "follow_formatting": True, + }, + ), + content_type="application/json", + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response["Content-Type"], "application/zip") + + with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: + self.assertEqual(len(zipf.filelist), 2) + self.assertIn("a space name/Title 2 - Doc 3.jpg", zipf.namelist()) + self.assertIn("test/This is Doc 2.pdf", zipf.namelist()) + + with self.doc2.source_file as f: + self.assertEqual(f.read(), zipf.read("test/This is Doc 2.pdf")) + + with self.doc3.source_file as f: + self.assertEqual( + f.read(), + zipf.read("a space name/Title 2 - Doc 3.jpg"), + ) + + @override_settings(FILENAME_FORMAT="somewhere/{title}") + def test_formatted_download_archive(self): + """ + GIVEN: + - Defined file naming format + WHEN: + - Bulk download request for archive documents + - Bulk download request requests to follow format + THEN: + - Files in resulting zipfile are formatted + """ + + self.doc2.title = "This is Doc 2" + self.doc2.save() + + self.doc3.title = "Title 2 - Doc 3" + self.doc3.save() + print(self.doc3.archive_path) + print(self.doc3.archive_filename) + + response = self.client.post( + self.ENDPOINT, + json.dumps( + { + "documents": [self.doc2.id, self.doc3.id], + "follow_formatting": True, + }, + ), + content_type="application/json", + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response["Content-Type"], "application/zip") + + with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: + self.assertEqual(len(zipf.filelist), 2) + self.assertIn("somewhere/This is Doc 2.pdf", zipf.namelist()) + self.assertIn("somewhere/Title 2 - Doc 3.pdf", zipf.namelist()) + + with self.doc2.source_file as f: + self.assertEqual(f.read(), zipf.read("somewhere/This is Doc 2.pdf")) + + with self.doc3.archive_file as f: + self.assertEqual(f.read(), zipf.read("somewhere/Title 2 - Doc 3.pdf")) + + @override_settings(FILENAME_FORMAT="{document_type}/{title}") + def test_formatted_download_both(self): + """ + GIVEN: + - Defined file naming format + WHEN: + - Bulk download request for original documents and archive documents + - Bulk download request requests to follow format + THEN: + - Files defined in resulting zipfile are formatted + """ + + dc1 = DocumentType.objects.create(name="bill") + dc2 = DocumentType.objects.create(name="statement") + + self.doc2.document_type = dc1 + self.doc2.title = "This is Doc 2" + self.doc2.save() + + self.doc3.document_type = dc2 + self.doc3.title = "Title 2 - Doc 3" + self.doc3.save() + + response = self.client.post( + self.ENDPOINT, + json.dumps( + { + "documents": [self.doc2.id, self.doc3.id], + "content": "both", + "follow_formatting": True, + }, + ), + content_type="application/json", + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response["Content-Type"], "application/zip") + + with zipfile.ZipFile(io.BytesIO(response.content)) as zipf: + self.assertEqual(len(zipf.filelist), 3) + self.assertIn("originals/bill/This is Doc 2.pdf", zipf.namelist()) + self.assertIn("archive/statement/Title 2 - Doc 3.pdf", zipf.namelist()) + self.assertIn("originals/statement/Title 2 - Doc 3.jpg", zipf.namelist()) + + with self.doc2.source_file as f: + self.assertEqual( + f.read(), + zipf.read("originals/bill/This is Doc 2.pdf"), + ) + + with self.doc3.archive_file as f: + self.assertEqual( + f.read(), + zipf.read("archive/statement/Title 2 - Doc 3.pdf"), + ) + + with self.doc3.source_file as f: + self.assertEqual( + f.read(), + zipf.read("originals/statement/Title 2 - Doc 3.jpg"), + )