-
-
- Created: {{ document.created | customDate }}
- Added: {{ document.added | customDate }}
- Modified: {{ document.modified | customDate }}
-
-
-
-
-
{{document.added | customDate:'mediumDate'}}
+ @if (displayFields.includes(DisplayField.ADDED)) {
+
+
+
+ Created: {{ document.created | customDate }}
+ Added: {{ document.added | customDate }}
+ Modified: {{ document.modified | customDate }}
+
+
+
+
+ {{document.added | customDate:'mediumDate'}}
+
+
+ }
+ @if (displayFields.includes(DisplayField.PAGES_COUNT) && document.pages_count) {
+
+
+
+ {document.pages_count, plural, =1 {1 page} other {{{document.pages_count}} pages}}
+
-
}
@if (displayFields.includes(DisplayField.ASN) && document.archive_serial_number | isNumber) {
diff --git a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
index fc15453be..8cff34140 100644
--- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
+++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts
@@ -34,6 +34,7 @@ const doc = {
correspondent: 8,
document_type: 10,
storage_path: null,
+ pages_count: 12,
notes: [
{
id: 11,
@@ -91,6 +92,10 @@ describe('DocumentCardSmallComponent', () => {
fixture.detectChanges()
})
+ it('should display a document', () => {
+ expect(fixture.nativeElement.textContent).toContain('12 pages')
+ })
+
it('should display a document, limit tags to 5', () => {
expect(fixture.nativeElement.textContent).toContain('Document 10')
expect(
diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html
index 368515970..ec2728865 100644
--- a/src-ui/src/app/components/document-list/document-list.component.html
+++ b/src-ui/src/app/components/document-list/document-list.component.html
@@ -246,6 +246,15 @@
(sort)="onSort($event)"
i18n>Added
}
+ @if (activeDisplayFields.includes(DisplayField.PAGES_COUNT)) {
+
Pages |
+ }
@if (activeDisplayFields.includes(DisplayField.SHARED)) {
Shared
@@ -330,6 +339,11 @@
{{d.added | customDate}}
}
+ @if (activeDisplayFields.includes(DisplayField.PAGES_COUNT)) {
+ |
+ {{ d.pages_count }}
+ |
+ }
@if (activeDisplayFields.includes(DisplayField.SHARED)) {
@if (d.is_shared_by_requester) { Yes } @else { No }
diff --git a/src-ui/src/app/components/document-list/document-list.component.spec.ts b/src-ui/src/app/components/document-list/document-list.component.spec.ts
index 26758b3c0..ad85652b8 100644
--- a/src-ui/src/app/components/document-list/document-list.component.spec.ts
+++ b/src-ui/src/app/components/document-list/document-list.component.spec.ts
@@ -602,7 +602,7 @@ describe('DocumentListComponent', () => {
expect(
fixture.debugElement.queryAll(By.directive(SortableDirective))
- ).toHaveLength(9)
+ ).toHaveLength(10)
expect(component.notesEnabled).toBeTruthy()
settingsService.set(SETTINGS_KEYS.NOTES_ENABLED, false)
@@ -610,14 +610,14 @@ describe('DocumentListComponent', () => {
expect(component.notesEnabled).toBeFalsy()
expect(
fixture.debugElement.queryAll(By.directive(SortableDirective))
- ).toHaveLength(8)
+ ).toHaveLength(9)
// insufficient perms
jest.spyOn(permissionService, 'currentUserCan').mockReturnValue(false)
fixture.detectChanges()
expect(
fixture.debugElement.queryAll(By.directive(SortableDirective))
- ).toHaveLength(4)
+ ).toHaveLength(5)
})
it('should support toggle on document objects', () => {
diff --git a/src-ui/src/app/data/document.ts b/src-ui/src/app/data/document.ts
index 1571d2a53..ffa435c49 100644
--- a/src-ui/src/app/data/document.ts
+++ b/src-ui/src/app/data/document.ts
@@ -26,6 +26,7 @@ export enum DisplayField {
OWNER = 'owner',
SHARED = 'shared',
ASN = 'asn',
+ PAGES_COUNT = 'pagescount',
}
export const DEFAULT_DISPLAY_FIELDS = [
@@ -73,6 +74,10 @@ export const DEFAULT_DISPLAY_FIELDS = [
id: DisplayField.ASN,
name: $localize`ASN`,
},
+ {
+ id: DisplayField.PAGES_COUNT,
+ name: $localize`Pages`,
+ },
]
export const DEFAULT_DASHBOARD_VIEW_PAGE_SIZE = 10
@@ -94,6 +99,7 @@ export const DOCUMENT_SORT_FIELDS = [
{ field: 'modified', name: $localize`Modified` },
{ field: 'num_notes', name: $localize`Notes` },
{ field: 'owner', name: $localize`Owner` },
+ { field: 'pages_count', name: $localize`Pages` },
]
export const DOCUMENT_SORT_FIELDS_FULLTEXT = [
@@ -164,4 +170,6 @@ export interface Document extends ObjectWithPermissions {
// write-only field
remove_inbox_tags?: boolean
+
+ pages_count?: number
}
diff --git a/src-ui/src/app/services/settings.service.ts b/src-ui/src/app/services/settings.service.ts
index 91d1cc320..2aab40ca9 100644
--- a/src-ui/src/app/services/settings.service.ts
+++ b/src-ui/src/app/services/settings.service.ts
@@ -345,6 +345,7 @@ export class SettingsService {
DisplayField.CREATED,
DisplayField.ADDED,
DisplayField.ASN,
+ DisplayField.PAGES_COUNT,
DisplayField.SHARED,
].includes(field.id)
) {
diff --git a/src/documents/consumer.py b/src/documents/consumer.py
index d90b88f5a..5099d5682 100644
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@@ -586,6 +586,7 @@ class ConsumerPlugin(
date = None
thumbnail = None
archive_path = None
+ pages_count = None
try:
self._send_progress(
@@ -621,6 +622,7 @@ class ConsumerPlugin(
)
date = parse_date(self.filename, text)
archive_path = document_parser.get_archive_path()
+ pages_count = document_parser.get_pages_count(self.working_copy, mime_type)
except ParseError as e:
document_parser.cleanup()
@@ -662,7 +664,12 @@ class ConsumerPlugin(
try:
with transaction.atomic():
# store the document.
- document = self._store(text=text, date=date, mime_type=mime_type)
+ document = self._store(
+ text=text,
+ date=date,
+ pages_count=pages_count,
+ mime_type=mime_type,
+ )
# If we get here, it was successful. Proceed with post-consume
# hooks. If they fail, nothing will get changed.
@@ -790,6 +797,7 @@ class ConsumerPlugin(
self,
text: str,
date: Optional[datetime.datetime],
+ pages_count: int,
mime_type: str,
) -> Document:
# If someone gave us the original filename, use it instead of doc.
@@ -835,6 +843,7 @@ class ConsumerPlugin(
created=create_date,
modified=create_date,
storage_type=storage_type,
+ pages_count=pages_count,
original_filename=self.filename,
)
diff --git a/src/documents/index.py b/src/documents/index.py
index d95a80213..c82c8bc73 100644
--- a/src/documents/index.py
+++ b/src/documents/index.py
@@ -80,6 +80,7 @@ def get_schema():
has_owner=BOOLEAN(),
viewer_id=KEYWORD(commas=True),
checksum=TEXT(),
+ pages_count=NUMERIC(sortable=True),
original_filename=TEXT(sortable=True),
is_shared=BOOLEAN(),
)
@@ -181,6 +182,7 @@ def update_document(writer: AsyncWriter, doc: Document):
has_owner=doc.owner is not None,
viewer_id=viewer_ids if viewer_ids else None,
checksum=doc.checksum,
+ pages_count=doc.pages_count,
original_filename=doc.original_filename,
is_shared=len(viewer_ids) > 0,
)
@@ -247,6 +249,7 @@ class DelayedQuery:
"archive_serial_number": "asn",
"num_notes": "num_notes",
"owner": "owner",
+ "pages_count": "pages_count",
}
if field.startswith("-"):
diff --git a/src/documents/migrations/1053_document_pages_count.py b/src/documents/migrations/1053_document_pages_count.py
new file mode 100644
index 000000000..25210f446
--- /dev/null
+++ b/src/documents/migrations/1053_document_pages_count.py
@@ -0,0 +1,109 @@
+# Generated by Django 4.2.16 on 2024-09-21 15:44
+
+import datetime
+from pathlib import Path
+
+import pikepdf
+from django.conf import settings
+from django.db import migrations
+from django.db import models
+from django.utils import timezone
+from django.utils.termcolors import colorize as colourise
+
+from documents.parsers import get_default_file_extension
+
+
+class Document:
+ """
+ Django's migrations restrict access to model methods, so this is a snapshot
+ of the methods that existed at the time this migration was written, since
+ we need to make use of a lot of these shortcuts here.
+ """
+
+ def __init__(self, doc):
+ self.pk = doc.pk
+ self.correspondent = doc.correspondent
+ self.title = doc.title
+ self.mime_type = doc.mime_type
+ self.filename = doc.filename
+ self.created = doc.created
+
+ def __str__(self) -> str:
+ # Convert UTC database time to local time
+ created = datetime.date.isoformat(timezone.localdate(self.created))
+
+ res = f"{created}"
+
+ if self.correspondent:
+ res += f" {self.correspondent}"
+ if self.title:
+ res += f" {self.title}"
+ return res
+
+ @property
+ def file_type(self):
+ return get_default_file_extension(self.mime_type)
+
+ @property
+ def source_path(self) -> Path:
+ if self.filename:
+ fname = str(self.filename)
+ return (settings.ORIGINALS_DIR / Path(fname)).resolve()
+
+
+def add_number_of_pages_to_pages_count(apps, schema_editor):
+ documentModel = apps.get_model("documents", "Document")
+
+ if not documentModel.objects.all().exists():
+ return
+
+ for doc in documentModel.objects.filter(mime_type="application/pdf"):
+ document = Document(doc)
+
+ print(
+ " {} {} {}".format(
+ colourise("*", fg="green"),
+ colourise("Calculating number of pages for", fg="white"),
+ colourise(document.filename, fg="cyan"),
+ ),
+ )
+
+ pdf = pikepdf.open(document.source_path)
+
+ if pdf.pages is not None:
+ doc.pages_count = len(pdf.pages)
+ doc.save()
+
+
+def remove_number_of_pages_to_pages_count(apps, schema_editor):
+ documentModel = apps.get_model("documents", "Document")
+
+ if not documentModel.objects.all().exists():
+ return
+
+ for document in documentModel.objects.filter(mime_type="application/pdf"):
+ document.pages_count = 0
+ document.save()
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("documents", "1052_document_transaction_id"),
+ ]
+
+ operations = [
+ migrations.AddField(
+ model_name="document",
+ name="pages_count",
+ field=models.PositiveIntegerField(
+ blank=False,
+ null=True,
+ unique=False,
+ db_index=False,
+ ),
+ ),
+ migrations.RunPython(
+ add_number_of_pages_to_pages_count,
+ remove_number_of_pages_to_pages_count,
+ ),
+ ]
diff --git a/src/documents/models.py b/src/documents/models.py
index 3ee11aeba..23d68e734 100644
--- a/src/documents/models.py
+++ b/src/documents/models.py
@@ -205,6 +205,18 @@ class Document(SoftDeleteModel, ModelWithOwner):
help_text=_("The checksum of the archived document."),
)
+ pages_count = models.PositiveIntegerField(
+ _("pages count"),
+ blank=False,
+ null=True,
+ unique=False,
+ db_index=False,
+ validators=[MinValueValidator(1)],
+ help_text=_(
+ "The number of pages of the document.",
+ ),
+ )
+
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
modified = models.DateTimeField(
@@ -414,6 +426,7 @@ class SavedView(ModelWithOwner):
OWNER = ("owner", _("Owner"))
SHARED = ("shared", _("Shared"))
ASN = ("asn", _("ASN"))
+ PAGES_COUNT = ("pagescount", _("Pages"))
CUSTOM_FIELD = ("custom_field_%d", ("Custom Field"))
name = models.CharField(_("name"), max_length=128)
diff --git a/src/documents/parsers.py b/src/documents/parsers.py
index 1297162e2..8cb744b8a 100644
--- a/src/documents/parsers.py
+++ b/src/documents/parsers.py
@@ -367,6 +367,9 @@ class DocumentParser(LoggingMixin):
def extract_metadata(self, document_path, mime_type):
return []
+ def get_pages_count(self, document_path, mime_type):
+ return None
+
def parse(self, document_path, mime_type, file_name=None):
raise NotImplementedError
diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py
index 747d744b6..49d0198dc 100644
--- a/src/documents/serialisers.py
+++ b/src/documents/serialisers.py
@@ -759,6 +759,7 @@ class DocumentSerializer(
original_file_name = SerializerMethodField()
archived_file_name = SerializerMethodField()
created_date = serializers.DateField(required=False)
+ pages_count = SerializerMethodField()
custom_fields = CustomFieldInstanceSerializer(
many=True,
@@ -779,6 +780,9 @@ class DocumentSerializer(
required=False,
)
+ def get_pages_count(self, obj):
+ return obj.pages_count
+
def get_original_file_name(self, obj):
return obj.original_filename
@@ -894,6 +898,7 @@ class DocumentSerializer(
"notes",
"custom_fields",
"remove_inbox_tags",
+ "pages_count",
)
list_serializer_class = OwnedObjectListSerializer
diff --git a/src/documents/views.py b/src/documents/views.py
index a8a5bf97d..f66ad77aa 100644
--- a/src/documents/views.py
+++ b/src/documents/views.py
@@ -361,6 +361,7 @@ class DocumentViewSet(
"archive_serial_number",
"num_notes",
"owner",
+ "pages_count",
)
def get_queryset(self):
@@ -444,6 +445,24 @@ class DocumentViewSet(
logger.warning(f"No parser for {mime_type}")
return []
+ def get_pages_count(self, file, mime_type):
+ if not os.path.isfile(file):
+ return None
+
+ parser_class = get_parser_class_for_mime_type(mime_type)
+ if parser_class:
+ parser = parser_class(progress_callback=None, logging_group=None)
+
+ try:
+ return parser.get_pages_count(file)
+ except Exception: # pragma: no cover
+ logger.exception(f"Issue getting pages count for {file}")
+ # TODO: cover GPG errors, remove later.
+ return []
+ else: # pragma: no cover
+ logger.warning(f"No parser for {mime_type}")
+ return []
+
def get_filesize(self, filename):
if os.path.isfile(filename):
return os.stat(filename).st_size
diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py
index 4e92990f1..925427f51 100644
--- a/src/paperless_tesseract/parsers.py
+++ b/src/paperless_tesseract/parsers.py
@@ -41,6 +41,15 @@ class RasterisedDocumentParser(DocumentParser):
"""
return OcrConfig()
+ def get_pages_count(self, document_path, mime_type):
+ pages_count = None
+ if mime_type == "application/pdf":
+ import pikepdf
+
+ pdf = pikepdf.open(document_path)
+ pages_count = len(pdf.pages)
+ return pages_count
+
def extract_metadata(self, document_path, mime_type):
result = []
if mime_type == "application/pdf":
diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py
index d63d965c5..b54c1ecd7 100644
--- a/src/paperless_tesseract/tests/test_parser.py
+++ b/src/paperless_tesseract/tests/test_parser.py
@@ -57,6 +57,20 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.assertContainsStrings(text.strip(), ["This is a test document."])
+ def test_get_pages_count(self):
+ parser = RasterisedDocumentParser(uuid.uuid4())
+ pages_count = parser.get_pages_count(
+ os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
+ "application/pdf",
+ )
+ self.assertEqual(pages_count, 1)
+
+ pages_count = parser.get_pages_count(
+ os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
+ "application/pdf",
+ )
+ self.assertEqual(pages_count, 6)
+
def test_thumbnail(self):
parser = RasterisedDocumentParser(uuid.uuid4())
thumb = parser.get_thumbnail(
|