diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md
index 30687680c..e7b263ed8 100644
--- a/docs/advanced_usage.md
+++ b/docs/advanced_usage.md
@@ -418,6 +418,15 @@ Insurances/                             # Insurances
     Defining a storage path is optional. If no storage path is defined for a
     document, the global [`PAPERLESS_FILENAME_FORMAT`](configuration.md#PAPERLESS_FILENAME_FORMAT) is applied.
 
+## Automatic recovery of invalid PDFs {#pdf-recovery}
+
+Paperless will attempt to "clean" certain invalid PDFs with `qpdf` before processing if, for example, the mime_type
+detection is incorrect. This can happen if the PDF is not properly formatted or contains errors.
+
+!!! warning
+
+    This process will technically modify the document before processing.
+
 ## Celery Monitoring {#celery-monitoring}
 
 The monitoring tool
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index 57277e4a6..0bc335b8b 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -539,6 +539,29 @@ class ConsumerPlugin(
 
             self.log.debug(f"Detected mime type: {mime_type}")
 
+            if (
+                Path(self.filename).suffix.lower() == ".pdf"
+                and mime_type in settings.CONSUMER_PDF_RECOVERABLE_MIME_TYPES
+            ):
+                try:
+                    # The file might be a pdf, but the mime type is wrong.
+                    # Try to clean with qpdf
+                    self.log.debug(
+                        "Detected possible PDF with wrong mime type, trying to clean with qpdf",
+                    )
+                    run_subprocess(
+                        [
+                            "qpdf",
+                            "--replace-input",
+                            self.working_copy,
+                        ],
+                        logger=self.log,
+                    )
+                    mime_type = magic.from_file(self.working_copy, mime=True)
+                    self.log.debug(f"Detected mime type after qpdf: {mime_type}")
+                except Exception as e:
+                    self.log.error(f"Error attempting to clean PDF: {e}")
+
             # Based on the mime type, get the parser for that type
             parser_class: Optional[type[DocumentParser]] = (
                 get_parser_class_for_mime_type(
diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py
index 737d1256f..30f3dd26d 100644
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -1389,9 +1389,18 @@ class PostDocumentSerializer(serializers.Serializer):
         mime_type = magic.from_buffer(document_data, mime=True)
 
         if not is_mime_type_supported(mime_type):
-            raise serializers.ValidationError(
-                _("File type %(type)s not supported") % {"type": mime_type},
-            )
+            if (
+                mime_type in settings.CONSUMER_PDF_RECOVERABLE_MIME_TYPES
+                and document.name.endswith(
+                    ".pdf",
+                )
+            ):
+                # If the file is an invalid PDF, we can try to recover it later in the consumer
+                mime_type = "application/pdf"
+            else:
+                raise serializers.ValidationError(
+                    _("File type %(type)s not supported") % {"type": mime_type},
+                )
 
         return document.name, document_data
 
diff --git a/src/documents/tests/samples/invalid_pdf.pdf b/src/documents/tests/samples/invalid_pdf.pdf
new file mode 100644
index 000000000..f226c2d84
Binary files /dev/null and b/src/documents/tests/samples/invalid_pdf.pdf differ
diff --git a/src/documents/tests/test_api_documents.py b/src/documents/tests/test_api_documents.py
index ee2e8ee1e..b1cd43932 100644
--- a/src/documents/tests/test_api_documents.py
+++ b/src/documents/tests/test_api_documents.py
@@ -1402,6 +1402,27 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
         self.assertEqual(overrides.filename, "simple.pdf")
         self.assertEqual(overrides.custom_field_ids, [custom_field.id])
 
+    def test_upload_invalid_pdf(self):
+        """
+        GIVEN: Invalid PDF named "*.pdf" that mime_type is in settings.CONSUMER_PDF_RECOVERABLE_MIME_TYPES
+        WHEN: Upload the file
+        THEN: The file is not rejected
+        """
+        self.consume_file_mock.return_value = celery.result.AsyncResult(
+            id=str(uuid.uuid4()),
+        )
+
+        with open(
+            os.path.join(os.path.dirname(__file__), "samples", "invalid_pdf.pdf"),
+            "rb",
+        ) as f:
+            response = self.client.post(
+                "/api/documents/post_document/",
+                {"document": f},
+            )
+
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
     def test_get_metadata(self):
         doc = Document.objects.create(
             title="test",
diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py
index 5b56e2cca..aa452e15b 100644
--- a/src/documents/tests/test_consumer.py
+++ b/src/documents/tests/test_consumer.py
@@ -235,6 +235,8 @@ class FaultyGenericExceptionParser(_BaseTestParser):
 
 def fake_magic_from_file(file, mime=False):
     if mime:
+        if file.name.startswith("invalid_pdf"):
+            return "application/octet-stream"
         if os.path.splitext(file)[1] == ".pdf":
             return "application/pdf"
         elif os.path.splitext(file)[1] == ".png":
@@ -952,6 +954,27 @@ class TestConsumer(
 
         sanity_check()
 
+    @mock.patch("documents.consumer.run_subprocess")
+    def test_try_to_clean_invalid_pdf(self, m):
+        shutil.copy(
+            Path(__file__).parent / "samples" / "invalid_pdf.pdf",
+            settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
+        )
+        with self.get_consumer(
+            settings.CONSUMPTION_DIR / "invalid_pdf.pdf",
+        ) as consumer:
+            # fails because no qpdf
+            self.assertRaises(ConsumerError, consumer.run)
+
+            m.assert_called_once()
+
+            args, _ = m.call_args
+
+            command = args[0]
+
+            self.assertEqual(command[0], "qpdf")
+            self.assertEqual(command[1], "--replace-input")
+
 
 @mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
 class TestConsumerCreatedDate(DirectoriesMixin, GetConsumerMixin, TestCase):
diff --git a/src/paperless/settings.py b/src/paperless/settings.py
index 851fe6217..2da0b49f1 100644
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@@ -960,6 +960,8 @@ CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = __get_boolean(
     "PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
 )
 
+CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)
+
 OCR_PAGES = __get_optional_int("PAPERLESS_OCR_PAGES")
 
 # The default language that tesseract will attempt to use when parsing