From 865856b06dca06647108945303814d89b317bce9 Mon Sep 17 00:00:00 2001 From: s0llvan <178677095+s0llvan@users.noreply.github.com> Date: Sat, 21 Sep 2024 18:18:19 +0000 Subject: [PATCH] Feature: number of pages of document in documents list --- src-ui/messages.xlf | 112 +++++++++++------- .../saved-view-widget.component.spec.ts | 2 + .../document-card-large.component.html | 6 + .../document-card-large.component.spec.ts | 2 + .../document-card-small.component.html | 34 ++++-- .../document-card-small.component.spec.ts | 5 + .../document-list.component.html | 14 +++ .../document-list.component.spec.ts | 6 +- src-ui/src/app/data/document.ts | 8 ++ src-ui/src/app/services/settings.service.ts | 1 + src/documents/consumer.py | 11 +- src/documents/index.py | 3 + .../migrations/1053_document_pages_count.py | 109 +++++++++++++++++ src/documents/models.py | 13 ++ src/documents/parsers.py | 3 + src/documents/serialisers.py | 5 + src/documents/views.py | 19 +++ src/paperless_tesseract/parsers.py | 9 ++ src/paperless_tesseract/tests/test_parser.py | 14 +++ 19 files changed, 318 insertions(+), 58 deletions(-) create mode 100644 src/documents/migrations/1053_document_pages_count.py diff --git a/src-ui/messages.xlf b/src-ui/messages.xlf index dec952685..7c0ec396d 100644 --- a/src-ui/messages.xlf +++ b/src-ui/messages.xlf @@ -1046,11 +1046,11 @@ src/app/data/document.ts - 62 + 63 src/app/data/document.ts - 95 + 100 @@ -1954,11 +1954,11 @@ src/app/data/document.ts - 38 + 39 src/app/data/document.ts - 92 + 97 @@ -2414,7 +2414,7 @@ src/app/components/document-list/document-card-small/document-card-small.component.html - 120 + 128 src/app/components/manage/custom-fields/custom-fields.component.html @@ -2760,7 +2760,7 @@ src/app/data/document.ts - 46 + 47 @@ -2972,7 +2972,7 @@ src/app/components/document-list/document-card-small/document-card-small.component.html - 131 + 139 @@ -3361,11 +3361,11 @@ src/app/data/document.ts - 42 + 43 src/app/data/document.ts - 93 + 98 @@ -5514,7 +5514,7 @@ src/app/components/document-list/document-list.component.html - 277 + 286 @@ -5529,7 +5529,7 @@ src/app/components/document-list/document-list.component.html - 312 + 321 @@ -5544,7 +5544,7 @@ src/app/components/document-list/document-list.component.html - 319 + 328 @@ -5842,11 +5842,11 @@ src/app/data/document.ts - 34 + 35 src/app/data/document.ts - 90 + 95 @@ -5883,11 +5883,11 @@ src/app/data/document.ts - 50 + 51 src/app/data/document.ts - 89 + 94 @@ -5910,11 +5910,11 @@ src/app/data/document.ts - 54 + 55 src/app/data/document.ts - 91 + 96 @@ -5937,7 +5937,7 @@ src/app/data/document.ts - 58 + 59 @@ -6730,7 +6730,7 @@ src/app/components/document-list/document-list.component.html - 288 + 297 @@ -6785,19 +6785,30 @@ 82,83 + + {VAR_PLURAL, plural, =1 {1 page} other { pages}} + + src/app/components/document-list/document-card-large/document-card-large.component.html + 117 + + + src/app/components/document-list/document-card-small/document-card-small.component.html + 95 + + Shared src/app/components/document-list/document-card-large/document-card-large.component.html - 121 + 127 src/app/components/document-list/document-card-small/document-card-small.component.html - 106 + 114 src/app/data/document.ts - 70 + 71 src/app/pipes/username.pipe.ts @@ -6808,7 +6819,7 @@ Score: src/app/components/document-list/document-card-large/document-card-large.component.html - 126 + 132 @@ -6947,11 +6958,11 @@ src/app/data/document.ts - 74 + 75 src/app/data/document.ts - 88 + 93 @@ -6983,11 +6994,11 @@ src/app/data/document.ts - 66 + 67 src/app/data/document.ts - 96 + 101 @@ -7025,25 +7036,51 @@ 243 + + Sort by number of pages + + src/app/components/document-list/document-list.component.html + 252 + + + + Pages + + src/app/components/document-list/document-list.component.html + 256 + + + src/app/data/document.ts + 79 + + + src/app/data/document.ts + 102 + + + src/app/data/paperless-config.ts + 90 + + Shared src/app/components/document-list/document-list.component.html - 250,252 + 259,261 Edit document src/app/components/document-list/document-list.component.html - 284 + 293 Yes src/app/components/document-list/document-list.component.html - 335 + 349 src/app/pipes/yes-no.pipe.ts @@ -7054,7 +7091,7 @@ No src/app/components/document-list/document-list.component.html - 335 + 349 src/app/pipes/yes-no.pipe.ts @@ -7988,14 +8025,14 @@ Modified src/app/data/document.ts - 94 + 99 Search score src/app/data/document.ts - 102 + 108 Score is a value returned by the full text search engine and specifies how well a result matches the given query @@ -8111,13 +8148,6 @@ 83 - - Pages - - src/app/data/paperless-config.ts - 90 - - Mode diff --git a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.spec.ts b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.spec.ts index cb120bb64..a7fcd19fe 100644 --- a/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.spec.ts +++ b/src-ui/src/app/components/dashboard/widgets/saved-view-widget/saved-view-widget.component.spec.ts @@ -65,6 +65,7 @@ const savedView: SavedView = { DisplayField.CORRESPONDENT, DisplayField.DOCUMENT_TYPE, DisplayField.STORAGE_PATH, + DisplayField.PAGES_COUNT, `${DisplayField.CUSTOM_FIELD}11` as any, `${DisplayField.CUSTOM_FIELD}15` as any, ], @@ -344,6 +345,7 @@ describe('SavedViewWidgetComponent', () => { expect(component.getColumnTitle(DisplayField.STORAGE_PATH)).toEqual( 'Storage path' ) + expect(component.getColumnTitle(DisplayField.PAGES_COUNT)).toEqual('Pages') }) it('should get correct column title for custom field', () => { diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html index 1a8c7df82..36501253f 100644 --- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html +++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.html @@ -111,6 +111,12 @@ } } + @if (displayFields.includes(DisplayField.PAGES_COUNT) && document.pages_count) { +
+ + {document.pages_count, plural, =1 {1 page} other {{{document.pages_count}} pages}} +
+ } @if (displayFields.includes(DisplayField.OWNER) && document.owner && document.owner !== settingsService.currentUser.id) {
{{document.owner | username}} diff --git a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.spec.ts b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.spec.ts index a3f047f03..841c74643 100644 --- a/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.spec.ts +++ b/src-ui/src/app/components/document-list/document-card-large/document-card-large.component.spec.ts @@ -31,6 +31,7 @@ const doc = { correspondent: 8, document_type: 10, storage_path: null, + pages_count: 8, notes: [ { id: 11, @@ -80,6 +81,7 @@ describe('DocumentCardLargeComponent', () => { it('should display a document', () => { expect(fixture.nativeElement.textContent).toContain('Document 10') expect(fixture.nativeElement.textContent).toContain('Cupcake ipsum') + expect(fixture.nativeElement.textContent).toContain('8 pages') }) it('should show preview on mouseover after delay to preload content', fakeAsync(() => { diff --git a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html index 92449214e..7713c5b03 100644 --- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html +++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.html @@ -73,20 +73,28 @@
} - @if (displayFields.includes(DisplayField.ADDED)) { -
- -
- Created: {{ document.created | customDate }} - Added: {{ document.added | customDate }} - Modified: {{ document.modified | customDate }} -
-
-
- - {{document.added | customDate:'mediumDate'}} + @if (displayFields.includes(DisplayField.ADDED)) { +
+ +
+ Created: {{ document.created | customDate }} + Added: {{ document.added | customDate }} + Modified: {{ document.modified | customDate }} +
+
+
+ + {{document.added | customDate:'mediumDate'}} +
+
+ } + @if (displayFields.includes(DisplayField.PAGES_COUNT) && document.pages_count) { +
+
+ + {document.pages_count, plural, =1 {1 page} other {{{document.pages_count}} pages}} +
-
} @if (displayFields.includes(DisplayField.ASN) && document.archive_serial_number | isNumber) {
diff --git a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts index fc15453be..8cff34140 100644 --- a/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts +++ b/src-ui/src/app/components/document-list/document-card-small/document-card-small.component.spec.ts @@ -34,6 +34,7 @@ const doc = { correspondent: 8, document_type: 10, storage_path: null, + pages_count: 12, notes: [ { id: 11, @@ -91,6 +92,10 @@ describe('DocumentCardSmallComponent', () => { fixture.detectChanges() }) + it('should display a document', () => { + expect(fixture.nativeElement.textContent).toContain('12 pages') + }) + it('should display a document, limit tags to 5', () => { expect(fixture.nativeElement.textContent).toContain('Document 10') expect( diff --git a/src-ui/src/app/components/document-list/document-list.component.html b/src-ui/src/app/components/document-list/document-list.component.html index 368515970..ec2728865 100644 --- a/src-ui/src/app/components/document-list/document-list.component.html +++ b/src-ui/src/app/components/document-list/document-list.component.html @@ -246,6 +246,15 @@ (sort)="onSort($event)" i18n>Added } + @if (activeDisplayFields.includes(DisplayField.PAGES_COUNT)) { + Pages + } @if (activeDisplayFields.includes(DisplayField.SHARED)) { Shared @@ -330,6 +339,11 @@ {{d.added | customDate}} } + @if (activeDisplayFields.includes(DisplayField.PAGES_COUNT)) { + + {{ d.pages_count }} + + } @if (activeDisplayFields.includes(DisplayField.SHARED)) { @if (d.is_shared_by_requester) { Yes } @else { No } diff --git a/src-ui/src/app/components/document-list/document-list.component.spec.ts b/src-ui/src/app/components/document-list/document-list.component.spec.ts index 26758b3c0..ad85652b8 100644 --- a/src-ui/src/app/components/document-list/document-list.component.spec.ts +++ b/src-ui/src/app/components/document-list/document-list.component.spec.ts @@ -602,7 +602,7 @@ describe('DocumentListComponent', () => { expect( fixture.debugElement.queryAll(By.directive(SortableDirective)) - ).toHaveLength(9) + ).toHaveLength(10) expect(component.notesEnabled).toBeTruthy() settingsService.set(SETTINGS_KEYS.NOTES_ENABLED, false) @@ -610,14 +610,14 @@ describe('DocumentListComponent', () => { expect(component.notesEnabled).toBeFalsy() expect( fixture.debugElement.queryAll(By.directive(SortableDirective)) - ).toHaveLength(8) + ).toHaveLength(9) // insufficient perms jest.spyOn(permissionService, 'currentUserCan').mockReturnValue(false) fixture.detectChanges() expect( fixture.debugElement.queryAll(By.directive(SortableDirective)) - ).toHaveLength(4) + ).toHaveLength(5) }) it('should support toggle on document objects', () => { diff --git a/src-ui/src/app/data/document.ts b/src-ui/src/app/data/document.ts index 1571d2a53..ffa435c49 100644 --- a/src-ui/src/app/data/document.ts +++ b/src-ui/src/app/data/document.ts @@ -26,6 +26,7 @@ export enum DisplayField { OWNER = 'owner', SHARED = 'shared', ASN = 'asn', + PAGES_COUNT = 'pagescount', } export const DEFAULT_DISPLAY_FIELDS = [ @@ -73,6 +74,10 @@ export const DEFAULT_DISPLAY_FIELDS = [ id: DisplayField.ASN, name: $localize`ASN`, }, + { + id: DisplayField.PAGES_COUNT, + name: $localize`Pages`, + }, ] export const DEFAULT_DASHBOARD_VIEW_PAGE_SIZE = 10 @@ -94,6 +99,7 @@ export const DOCUMENT_SORT_FIELDS = [ { field: 'modified', name: $localize`Modified` }, { field: 'num_notes', name: $localize`Notes` }, { field: 'owner', name: $localize`Owner` }, + { field: 'pages_count', name: $localize`Pages` }, ] export const DOCUMENT_SORT_FIELDS_FULLTEXT = [ @@ -164,4 +170,6 @@ export interface Document extends ObjectWithPermissions { // write-only field remove_inbox_tags?: boolean + + pages_count?: number } diff --git a/src-ui/src/app/services/settings.service.ts b/src-ui/src/app/services/settings.service.ts index 91d1cc320..2aab40ca9 100644 --- a/src-ui/src/app/services/settings.service.ts +++ b/src-ui/src/app/services/settings.service.ts @@ -345,6 +345,7 @@ export class SettingsService { DisplayField.CREATED, DisplayField.ADDED, DisplayField.ASN, + DisplayField.PAGES_COUNT, DisplayField.SHARED, ].includes(field.id) ) { diff --git a/src/documents/consumer.py b/src/documents/consumer.py index d90b88f5a..5099d5682 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -586,6 +586,7 @@ class ConsumerPlugin( date = None thumbnail = None archive_path = None + pages_count = None try: self._send_progress( @@ -621,6 +622,7 @@ class ConsumerPlugin( ) date = parse_date(self.filename, text) archive_path = document_parser.get_archive_path() + pages_count = document_parser.get_pages_count(self.working_copy, mime_type) except ParseError as e: document_parser.cleanup() @@ -662,7 +664,12 @@ class ConsumerPlugin( try: with transaction.atomic(): # store the document. - document = self._store(text=text, date=date, mime_type=mime_type) + document = self._store( + text=text, + date=date, + pages_count=pages_count, + mime_type=mime_type, + ) # If we get here, it was successful. Proceed with post-consume # hooks. If they fail, nothing will get changed. @@ -790,6 +797,7 @@ class ConsumerPlugin( self, text: str, date: Optional[datetime.datetime], + pages_count: int, mime_type: str, ) -> Document: # If someone gave us the original filename, use it instead of doc. @@ -835,6 +843,7 @@ class ConsumerPlugin( created=create_date, modified=create_date, storage_type=storage_type, + pages_count=pages_count, original_filename=self.filename, ) diff --git a/src/documents/index.py b/src/documents/index.py index d95a80213..c82c8bc73 100644 --- a/src/documents/index.py +++ b/src/documents/index.py @@ -80,6 +80,7 @@ def get_schema(): has_owner=BOOLEAN(), viewer_id=KEYWORD(commas=True), checksum=TEXT(), + pages_count=NUMERIC(sortable=True), original_filename=TEXT(sortable=True), is_shared=BOOLEAN(), ) @@ -181,6 +182,7 @@ def update_document(writer: AsyncWriter, doc: Document): has_owner=doc.owner is not None, viewer_id=viewer_ids if viewer_ids else None, checksum=doc.checksum, + pages_count=doc.pages_count, original_filename=doc.original_filename, is_shared=len(viewer_ids) > 0, ) @@ -247,6 +249,7 @@ class DelayedQuery: "archive_serial_number": "asn", "num_notes": "num_notes", "owner": "owner", + "pages_count": "pages_count", } if field.startswith("-"): diff --git a/src/documents/migrations/1053_document_pages_count.py b/src/documents/migrations/1053_document_pages_count.py new file mode 100644 index 000000000..25210f446 --- /dev/null +++ b/src/documents/migrations/1053_document_pages_count.py @@ -0,0 +1,109 @@ +# Generated by Django 4.2.16 on 2024-09-21 15:44 + +import datetime +from pathlib import Path + +import pikepdf +from django.conf import settings +from django.db import migrations +from django.db import models +from django.utils import timezone +from django.utils.termcolors import colorize as colourise + +from documents.parsers import get_default_file_extension + + +class Document: + """ + Django's migrations restrict access to model methods, so this is a snapshot + of the methods that existed at the time this migration was written, since + we need to make use of a lot of these shortcuts here. + """ + + def __init__(self, doc): + self.pk = doc.pk + self.correspondent = doc.correspondent + self.title = doc.title + self.mime_type = doc.mime_type + self.filename = doc.filename + self.created = doc.created + + def __str__(self) -> str: + # Convert UTC database time to local time + created = datetime.date.isoformat(timezone.localdate(self.created)) + + res = f"{created}" + + if self.correspondent: + res += f" {self.correspondent}" + if self.title: + res += f" {self.title}" + return res + + @property + def file_type(self): + return get_default_file_extension(self.mime_type) + + @property + def source_path(self) -> Path: + if self.filename: + fname = str(self.filename) + return (settings.ORIGINALS_DIR / Path(fname)).resolve() + + +def add_number_of_pages_to_pages_count(apps, schema_editor): + documentModel = apps.get_model("documents", "Document") + + if not documentModel.objects.all().exists(): + return + + for doc in documentModel.objects.filter(mime_type="application/pdf"): + document = Document(doc) + + print( + " {} {} {}".format( + colourise("*", fg="green"), + colourise("Calculating number of pages for", fg="white"), + colourise(document.filename, fg="cyan"), + ), + ) + + pdf = pikepdf.open(document.source_path) + + if pdf.pages is not None: + doc.pages_count = len(pdf.pages) + doc.save() + + +def remove_number_of_pages_to_pages_count(apps, schema_editor): + documentModel = apps.get_model("documents", "Document") + + if not documentModel.objects.all().exists(): + return + + for document in documentModel.objects.filter(mime_type="application/pdf"): + document.pages_count = 0 + document.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("documents", "1052_document_transaction_id"), + ] + + operations = [ + migrations.AddField( + model_name="document", + name="pages_count", + field=models.PositiveIntegerField( + blank=False, + null=True, + unique=False, + db_index=False, + ), + ), + migrations.RunPython( + add_number_of_pages_to_pages_count, + remove_number_of_pages_to_pages_count, + ), + ] diff --git a/src/documents/models.py b/src/documents/models.py index 3ee11aeba..23d68e734 100644 --- a/src/documents/models.py +++ b/src/documents/models.py @@ -205,6 +205,18 @@ class Document(SoftDeleteModel, ModelWithOwner): help_text=_("The checksum of the archived document."), ) + pages_count = models.PositiveIntegerField( + _("pages count"), + blank=False, + null=True, + unique=False, + db_index=False, + validators=[MinValueValidator(1)], + help_text=_( + "The number of pages of the document.", + ), + ) + created = models.DateTimeField(_("created"), default=timezone.now, db_index=True) modified = models.DateTimeField( @@ -414,6 +426,7 @@ class SavedView(ModelWithOwner): OWNER = ("owner", _("Owner")) SHARED = ("shared", _("Shared")) ASN = ("asn", _("ASN")) + PAGES_COUNT = ("pagescount", _("Pages")) CUSTOM_FIELD = ("custom_field_%d", ("Custom Field")) name = models.CharField(_("name"), max_length=128) diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 1297162e2..8cb744b8a 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -367,6 +367,9 @@ class DocumentParser(LoggingMixin): def extract_metadata(self, document_path, mime_type): return [] + def get_pages_count(self, document_path, mime_type): + return None + def parse(self, document_path, mime_type, file_name=None): raise NotImplementedError diff --git a/src/documents/serialisers.py b/src/documents/serialisers.py index 747d744b6..49d0198dc 100644 --- a/src/documents/serialisers.py +++ b/src/documents/serialisers.py @@ -759,6 +759,7 @@ class DocumentSerializer( original_file_name = SerializerMethodField() archived_file_name = SerializerMethodField() created_date = serializers.DateField(required=False) + pages_count = SerializerMethodField() custom_fields = CustomFieldInstanceSerializer( many=True, @@ -779,6 +780,9 @@ class DocumentSerializer( required=False, ) + def get_pages_count(self, obj): + return obj.pages_count + def get_original_file_name(self, obj): return obj.original_filename @@ -894,6 +898,7 @@ class DocumentSerializer( "notes", "custom_fields", "remove_inbox_tags", + "pages_count", ) list_serializer_class = OwnedObjectListSerializer diff --git a/src/documents/views.py b/src/documents/views.py index a8a5bf97d..f66ad77aa 100644 --- a/src/documents/views.py +++ b/src/documents/views.py @@ -361,6 +361,7 @@ class DocumentViewSet( "archive_serial_number", "num_notes", "owner", + "pages_count", ) def get_queryset(self): @@ -444,6 +445,24 @@ class DocumentViewSet( logger.warning(f"No parser for {mime_type}") return [] + def get_pages_count(self, file, mime_type): + if not os.path.isfile(file): + return None + + parser_class = get_parser_class_for_mime_type(mime_type) + if parser_class: + parser = parser_class(progress_callback=None, logging_group=None) + + try: + return parser.get_pages_count(file) + except Exception: # pragma: no cover + logger.exception(f"Issue getting pages count for {file}") + # TODO: cover GPG errors, remove later. + return [] + else: # pragma: no cover + logger.warning(f"No parser for {mime_type}") + return [] + def get_filesize(self, filename): if os.path.isfile(filename): return os.stat(filename).st_size diff --git a/src/paperless_tesseract/parsers.py b/src/paperless_tesseract/parsers.py index 4e92990f1..925427f51 100644 --- a/src/paperless_tesseract/parsers.py +++ b/src/paperless_tesseract/parsers.py @@ -41,6 +41,15 @@ class RasterisedDocumentParser(DocumentParser): """ return OcrConfig() + def get_pages_count(self, document_path, mime_type): + pages_count = None + if mime_type == "application/pdf": + import pikepdf + + pdf = pikepdf.open(document_path) + pages_count = len(pdf.pages) + return pages_count + def extract_metadata(self, document_path, mime_type): result = [] if mime_type == "application/pdf": diff --git a/src/paperless_tesseract/tests/test_parser.py b/src/paperless_tesseract/tests/test_parser.py index d63d965c5..b54c1ecd7 100644 --- a/src/paperless_tesseract/tests/test_parser.py +++ b/src/paperless_tesseract/tests/test_parser.py @@ -57,6 +57,20 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): self.assertContainsStrings(text.strip(), ["This is a test document."]) + def test_get_pages_count(self): + parser = RasterisedDocumentParser(uuid.uuid4()) + pages_count = parser.get_pages_count( + os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), + "application/pdf", + ) + self.assertEqual(pages_count, 1) + + pages_count = parser.get_pages_count( + os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), + "application/pdf", + ) + self.assertEqual(pages_count, 6) + def test_thumbnail(self): parser = RasterisedDocumentParser(uuid.uuid4()) thumb = parser.get_thumbnail(