Get storage paths working
This commit is contained in:
parent
3e5886584f
commit
1c042d4aaf
@ -1,7 +1,7 @@
|
|||||||
import { SettingsService } from './services/settings.service'
|
import { SettingsService } from './services/settings.service'
|
||||||
import { SETTINGS_KEYS } from './data/paperless-uisettings'
|
import { SETTINGS_KEYS } from './data/paperless-uisettings'
|
||||||
import { Component, OnDestroy, OnInit, Renderer2 } from '@angular/core'
|
import { Component, OnDestroy, OnInit, Renderer2 } from '@angular/core'
|
||||||
import { Router } from '@angular/router'
|
import { ActivatedRoute, Router } from '@angular/router'
|
||||||
import { Subscription } from 'rxjs'
|
import { Subscription } from 'rxjs'
|
||||||
import { ConsumerStatusService } from './services/consumer-status.service'
|
import { ConsumerStatusService } from './services/consumer-status.service'
|
||||||
import { ToastService } from './services/toast.service'
|
import { ToastService } from './services/toast.service'
|
||||||
@ -34,6 +34,7 @@ export class AppComponent implements OnInit, OnDestroy {
|
|||||||
private consumerStatusService: ConsumerStatusService,
|
private consumerStatusService: ConsumerStatusService,
|
||||||
private toastService: ToastService,
|
private toastService: ToastService,
|
||||||
private router: Router,
|
private router: Router,
|
||||||
|
private route: ActivatedRoute,
|
||||||
private uploadDocumentsService: UploadDocumentsService,
|
private uploadDocumentsService: UploadDocumentsService,
|
||||||
private tasksService: TasksService,
|
private tasksService: TasksService,
|
||||||
public tourService: TourService,
|
public tourService: TourService,
|
||||||
@ -265,7 +266,9 @@ export class AppComponent implements OnInit, OnDestroy {
|
|||||||
|
|
||||||
public dropped(files: NgxFileDropEntry[]) {
|
public dropped(files: NgxFileDropEntry[]) {
|
||||||
this.fileLeave(true)
|
this.fileLeave(true)
|
||||||
this.uploadDocumentsService.uploadFiles(files)
|
let storagePathId = parseInt(this.route.snapshot.queryParams['spid'])
|
||||||
|
storagePathId = !isNaN(storagePathId) ? storagePathId : undefined
|
||||||
|
this.uploadDocumentsService.uploadFiles(files, storagePathId)
|
||||||
this.toastService.showInfo($localize`Initiating upload...`, 3000)
|
this.toastService.showInfo($localize`Initiating upload...`, 3000)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -157,7 +157,6 @@ export class ExplorerComponent
|
|||||||
this.route.queryParamMap
|
this.route.queryParamMap
|
||||||
.pipe(takeUntil(this.unsubscribeNotifier))
|
.pipe(takeUntil(this.unsubscribeNotifier))
|
||||||
.subscribe((queryParams) => {
|
.subscribe((queryParams) => {
|
||||||
console.log('query params updated:', queryParams)
|
|
||||||
this.list.loadFromQueryParams(queryParams)
|
this.list.loadFromQueryParams(queryParams)
|
||||||
this.unmodifiedFilterRules = []
|
this.unmodifiedFilterRules = []
|
||||||
})
|
})
|
||||||
|
@ -181,7 +181,6 @@ export class StoragePathListViewService {
|
|||||||
)
|
)
|
||||||
.subscribe({
|
.subscribe({
|
||||||
next: (result) => {
|
next: (result) => {
|
||||||
console.log('result:', result)
|
|
||||||
this.initialized = true
|
this.initialized = true
|
||||||
this.isReloading = false
|
this.isReloading = false
|
||||||
activeListViewState.collectionSize = result.count
|
activeListViewState.collectionSize = result.count
|
||||||
|
@ -21,13 +21,18 @@ export class UploadDocumentsService {
|
|||||||
private settings: SettingsService
|
private settings: SettingsService
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
uploadFiles(files: NgxFileDropEntry[]) {
|
uploadFiles(files: NgxFileDropEntry[], storagePathId?: number) {
|
||||||
for (const droppedFile of files) {
|
for (const droppedFile of files) {
|
||||||
if (droppedFile.fileEntry.isFile) {
|
if (droppedFile.fileEntry.isFile) {
|
||||||
const fileEntry = droppedFile.fileEntry as FileSystemFileEntry
|
const fileEntry = droppedFile.fileEntry as FileSystemFileEntry
|
||||||
fileEntry.file((file: File) => {
|
fileEntry.file((file: File) => {
|
||||||
let formData = new FormData()
|
let formData = new FormData()
|
||||||
formData.append('document', file, file.name)
|
formData.append('document', file, file.name)
|
||||||
|
|
||||||
|
if (storagePathId) {
|
||||||
|
formData.append('storage_path_id', storagePathId.toString())
|
||||||
|
}
|
||||||
|
|
||||||
let status = this.consumerStatusService.newFileUpload(file.name)
|
let status = this.consumerStatusService.newFileUpload(file.name)
|
||||||
|
|
||||||
status.message = $localize`Connecting...`
|
status.message = $localize`Connecting...`
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,62 +1,63 @@
|
|||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
import datetime
|
||||||
import enum
|
import enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class DocumentMetadataOverrides:
|
class DocumentMetadataOverrides:
|
||||||
"""
|
"""
|
||||||
Manages overrides for document fields which normally would
|
Manages overrides for document fields which normally would
|
||||||
be set from content or matching. All fields default to None,
|
be set from content or matching. All fields default to None,
|
||||||
meaning no override is happening
|
meaning no override is happening
|
||||||
"""
|
"""
|
||||||
|
|
||||||
filename: Optional[str] = None
|
filename: Optional[str] = None
|
||||||
title: Optional[str] = None
|
title: Optional[str] = None
|
||||||
correspondent_id: Optional[int] = None
|
correspondent_id: Optional[int] = None
|
||||||
document_type_id: Optional[int] = None
|
document_type_id: Optional[int] = None
|
||||||
tag_ids: Optional[List[int]] = None
|
tag_ids: Optional[List[int]] = None
|
||||||
created: Optional[datetime.datetime] = None
|
created: Optional[datetime.datetime] = None
|
||||||
asn: Optional[int] = None
|
asn: Optional[int] = None
|
||||||
owner_id: Optional[int] = None
|
owner_id: Optional[int] = None
|
||||||
|
storage_path_id: Optional[int] = None
|
||||||
|
|
||||||
class DocumentSource(enum.IntEnum):
|
|
||||||
"""
|
class DocumentSource(enum.IntEnum):
|
||||||
The source of an incoming document. May have other uses in the future
|
"""
|
||||||
"""
|
The source of an incoming document. May have other uses in the future
|
||||||
|
"""
|
||||||
ConsumeFolder = enum.auto()
|
|
||||||
ApiUpload = enum.auto()
|
ConsumeFolder = enum.auto()
|
||||||
MailFetch = enum.auto()
|
ApiUpload = enum.auto()
|
||||||
|
MailFetch = enum.auto()
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class ConsumableDocument:
|
@dataclasses.dataclass
|
||||||
"""
|
class ConsumableDocument:
|
||||||
Encapsulates an incoming document, either from consume folder, API upload
|
"""
|
||||||
or mail fetching and certain useful operations on it.
|
Encapsulates an incoming document, either from consume folder, API upload
|
||||||
"""
|
or mail fetching and certain useful operations on it.
|
||||||
|
"""
|
||||||
source: DocumentSource
|
|
||||||
original_file: Path
|
source: DocumentSource
|
||||||
mime_type: str = dataclasses.field(init=False, default=None)
|
original_file: Path
|
||||||
|
mime_type: str = dataclasses.field(init=False, default=None)
|
||||||
def __post_init__(self):
|
|
||||||
"""
|
def __post_init__(self):
|
||||||
After a dataclass is initialized, this is called to finalize some data
|
"""
|
||||||
1. Make sure the original path is an absolute, fully qualified path
|
After a dataclass is initialized, this is called to finalize some data
|
||||||
2. Get the mime type of the file
|
1. Make sure the original path is an absolute, fully qualified path
|
||||||
"""
|
2. Get the mime type of the file
|
||||||
# Always fully qualify the path first thing
|
"""
|
||||||
# Just in case, convert to a path if it's a str
|
# Always fully qualify the path first thing
|
||||||
self.original_file = Path(self.original_file).resolve()
|
# Just in case, convert to a path if it's a str
|
||||||
|
self.original_file = Path(self.original_file).resolve()
|
||||||
# Get the file type once at init
|
|
||||||
# Note this function isn't called when the object is unpickled
|
# Get the file type once at init
|
||||||
self.mime_type = magic.from_file(self.original_file, mime=True)
|
# Note this function isn't called when the object is unpickled
|
||||||
|
self.mime_type = magic.from_file(self.original_file, mime=True)
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,303 +1,304 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from typing import Type
|
from typing import Type
|
||||||
|
|
||||||
import tqdm
|
import tqdm
|
||||||
from asgiref.sync import async_to_sync
|
from asgiref.sync import async_to_sync
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
from channels.layers import get_channel_layer
|
from channels.layers import get_channel_layer
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
from django.db.models.signals import post_save
|
from django.db.models.signals import post_save
|
||||||
from documents import barcodes
|
from documents import barcodes
|
||||||
from documents import index
|
from documents import index
|
||||||
from documents import sanity_checker
|
from documents import sanity_checker
|
||||||
from documents.classifier import DocumentClassifier
|
from documents.classifier import DocumentClassifier
|
||||||
from documents.classifier import load_classifier
|
from documents.classifier import load_classifier
|
||||||
from documents.consumer import Consumer
|
from documents.consumer import Consumer
|
||||||
from documents.consumer import ConsumerError
|
from documents.consumer import ConsumerError
|
||||||
from documents.data_models import ConsumableDocument
|
from documents.data_models import ConsumableDocument
|
||||||
from documents.data_models import DocumentMetadataOverrides
|
from documents.data_models import DocumentMetadataOverrides
|
||||||
from documents.data_models import DocumentSource
|
from documents.data_models import DocumentSource
|
||||||
from documents.file_handling import create_source_path_directory
|
from documents.file_handling import create_source_path_directory
|
||||||
from documents.file_handling import generate_unique_filename
|
from documents.file_handling import generate_unique_filename
|
||||||
from documents.models import Correspondent
|
from documents.models import Correspondent
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.models import DocumentType
|
from documents.models import DocumentType
|
||||||
from documents.models import StoragePath
|
from documents.models import StoragePath
|
||||||
from documents.models import Tag
|
from documents.models import Tag
|
||||||
from documents.parsers import DocumentParser
|
from documents.parsers import DocumentParser
|
||||||
from documents.parsers import get_parser_class_for_mime_type
|
from documents.parsers import get_parser_class_for_mime_type
|
||||||
from documents.sanity_checker import SanityCheckFailedException
|
from documents.sanity_checker import SanityCheckFailedException
|
||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
from redis.exceptions import ConnectionError
|
from redis.exceptions import ConnectionError
|
||||||
from whoosh.writing import AsyncWriter
|
from whoosh.writing import AsyncWriter
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger("paperless.tasks")
|
logger = logging.getLogger("paperless.tasks")
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def index_optimize():
|
def index_optimize():
|
||||||
ix = index.open_index()
|
ix = index.open_index()
|
||||||
writer = AsyncWriter(ix)
|
writer = AsyncWriter(ix)
|
||||||
writer.commit(optimize=True)
|
writer.commit(optimize=True)
|
||||||
|
|
||||||
|
|
||||||
def index_reindex(progress_bar_disable=False):
|
def index_reindex(progress_bar_disable=False):
|
||||||
documents = Document.objects.all()
|
documents = Document.objects.all()
|
||||||
|
|
||||||
ix = index.open_index(recreate=True)
|
ix = index.open_index(recreate=True)
|
||||||
|
|
||||||
with AsyncWriter(ix) as writer:
|
with AsyncWriter(ix) as writer:
|
||||||
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
for document in tqdm.tqdm(documents, disable=progress_bar_disable):
|
||||||
index.update_document(writer, document)
|
index.update_document(writer, document)
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def train_classifier():
|
def train_classifier():
|
||||||
if (
|
if (
|
||||||
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||||
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||||
and not Correspondent.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
and not Correspondent.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||||
and not StoragePath.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
and not StoragePath.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||||
):
|
):
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
|
||||||
if not classifier:
|
if not classifier:
|
||||||
classifier = DocumentClassifier()
|
classifier = DocumentClassifier()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if classifier.train():
|
if classifier.train():
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
f"Saving updated classifier model to {settings.MODEL_FILE}...",
|
||||||
)
|
)
|
||||||
classifier.save()
|
classifier.save()
|
||||||
else:
|
else:
|
||||||
logger.debug("Training data unchanged.")
|
logger.debug("Training data unchanged.")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Classifier error: " + str(e))
|
logger.warning("Classifier error: " + str(e))
|
||||||
|
|
||||||
|
|
||||||
@shared_task
|
@shared_task
|
||||||
def consume_file(
|
def consume_file(
|
||||||
input_doc: ConsumableDocument,
|
input_doc: ConsumableDocument,
|
||||||
overrides: Optional[DocumentMetadataOverrides] = None,
|
overrides: Optional[DocumentMetadataOverrides] = None,
|
||||||
):
|
):
|
||||||
|
|
||||||
# Default no overrides
|
# Default no overrides
|
||||||
if overrides is None:
|
if overrides is None:
|
||||||
overrides = DocumentMetadataOverrides()
|
overrides = DocumentMetadataOverrides()
|
||||||
|
|
||||||
# read all barcodes in the current document
|
# read all barcodes in the current document
|
||||||
if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE:
|
if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||||
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
doc_barcode_info = barcodes.scan_file_for_barcodes(
|
||||||
input_doc.original_file,
|
input_doc.original_file,
|
||||||
input_doc.mime_type,
|
input_doc.mime_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
# split document by separator pages, if enabled
|
# split document by separator pages, if enabled
|
||||||
if settings.CONSUMER_ENABLE_BARCODES:
|
if settings.CONSUMER_ENABLE_BARCODES:
|
||||||
separators = barcodes.get_separating_barcodes(doc_barcode_info.barcodes)
|
separators = barcodes.get_separating_barcodes(doc_barcode_info.barcodes)
|
||||||
|
|
||||||
if len(separators) > 0:
|
if len(separators) > 0:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Pages with separators found in: {input_doc.original_file}",
|
f"Pages with separators found in: {input_doc.original_file}",
|
||||||
)
|
)
|
||||||
document_list = barcodes.separate_pages(
|
document_list = barcodes.separate_pages(
|
||||||
doc_barcode_info.pdf_path,
|
doc_barcode_info.pdf_path,
|
||||||
separators,
|
separators,
|
||||||
)
|
)
|
||||||
|
|
||||||
if document_list:
|
if document_list:
|
||||||
|
|
||||||
# If the file is an upload, it's in the scratch directory
|
# If the file is an upload, it's in the scratch directory
|
||||||
# Move it to consume directory to be picked up
|
# Move it to consume directory to be picked up
|
||||||
# Otherwise, use the current parent to keep possible tags
|
# Otherwise, use the current parent to keep possible tags
|
||||||
# from subdirectories
|
# from subdirectories
|
||||||
if input_doc.source != DocumentSource.ConsumeFolder:
|
if input_doc.source != DocumentSource.ConsumeFolder:
|
||||||
save_to_dir = settings.CONSUMPTION_DIR
|
save_to_dir = settings.CONSUMPTION_DIR
|
||||||
else:
|
else:
|
||||||
# Note this uses the original file, because it's in the
|
# Note this uses the original file, because it's in the
|
||||||
# consume folder already and may include additional path
|
# consume folder already and may include additional path
|
||||||
# components for tagging
|
# components for tagging
|
||||||
# the .path is somewhere in scratch in this case
|
# the .path is somewhere in scratch in this case
|
||||||
save_to_dir = input_doc.original_file.parent
|
save_to_dir = input_doc.original_file.parent
|
||||||
|
|
||||||
for n, document in enumerate(document_list):
|
for n, document in enumerate(document_list):
|
||||||
# save to consumption dir
|
# save to consumption dir
|
||||||
# rename it to the original filename with number prefix
|
# rename it to the original filename with number prefix
|
||||||
if overrides.filename is not None:
|
if overrides.filename is not None:
|
||||||
newname = f"{str(n)}_{overrides.filename}"
|
newname = f"{str(n)}_{overrides.filename}"
|
||||||
else:
|
else:
|
||||||
newname = None
|
newname = None
|
||||||
|
|
||||||
barcodes.save_to_dir(
|
barcodes.save_to_dir(
|
||||||
document,
|
document,
|
||||||
newname=newname,
|
newname=newname,
|
||||||
target_dir=save_to_dir,
|
target_dir=save_to_dir,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Split file has been copied safely, remove it
|
# Split file has been copied safely, remove it
|
||||||
document.unlink()
|
document.unlink()
|
||||||
|
|
||||||
# And clean up the directory as well, now it's empty
|
# And clean up the directory as well, now it's empty
|
||||||
shutil.rmtree(document_list[0].parent)
|
shutil.rmtree(document_list[0].parent)
|
||||||
|
|
||||||
# This file has been split into multiple files without issue
|
# This file has been split into multiple files without issue
|
||||||
# remove the original and working copy
|
# remove the original and working copy
|
||||||
input_doc.original_file.unlink()
|
input_doc.original_file.unlink()
|
||||||
|
|
||||||
# If the original file was a TIFF, remove the PDF generated from it
|
# If the original file was a TIFF, remove the PDF generated from it
|
||||||
if input_doc.mime_type == "image/tiff":
|
if input_doc.mime_type == "image/tiff":
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Deleting file {doc_barcode_info.pdf_path}",
|
f"Deleting file {doc_barcode_info.pdf_path}",
|
||||||
)
|
)
|
||||||
doc_barcode_info.pdf_path.unlink()
|
doc_barcode_info.pdf_path.unlink()
|
||||||
|
|
||||||
# notify the sender, otherwise the progress bar
|
# notify the sender, otherwise the progress bar
|
||||||
# in the UI stays stuck
|
# in the UI stays stuck
|
||||||
payload = {
|
payload = {
|
||||||
"filename": overrides.filename or input_doc.original_file.name,
|
"filename": overrides.filename or input_doc.original_file.name,
|
||||||
"task_id": None,
|
"task_id": None,
|
||||||
"current_progress": 100,
|
"current_progress": 100,
|
||||||
"max_progress": 100,
|
"max_progress": 100,
|
||||||
"status": "SUCCESS",
|
"status": "SUCCESS",
|
||||||
"message": "finished",
|
"message": "finished",
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
async_to_sync(get_channel_layer().group_send)(
|
async_to_sync(get_channel_layer().group_send)(
|
||||||
"status_updates",
|
"status_updates",
|
||||||
{"type": "status_update", "data": payload},
|
{"type": "status_update", "data": payload},
|
||||||
)
|
)
|
||||||
except ConnectionError as e:
|
except ConnectionError as e:
|
||||||
logger.warning(f"ConnectionError on status send: {str(e)}")
|
logger.warning(f"ConnectionError on status send: {str(e)}")
|
||||||
# consuming stops here, since the original document with
|
# consuming stops here, since the original document with
|
||||||
# the barcodes has been split and will be consumed separately
|
# the barcodes has been split and will be consumed separately
|
||||||
return "File successfully split"
|
return "File successfully split"
|
||||||
|
|
||||||
# try reading the ASN from barcode
|
# try reading the ASN from barcode
|
||||||
if settings.CONSUMER_ENABLE_ASN_BARCODE:
|
if settings.CONSUMER_ENABLE_ASN_BARCODE:
|
||||||
overrides.asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
|
overrides.asn = barcodes.get_asn_from_barcodes(doc_barcode_info.barcodes)
|
||||||
if overrides.asn:
|
if overrides.asn:
|
||||||
logger.info(f"Found ASN in barcode: {overrides.asn}")
|
logger.info(f"Found ASN in barcode: {overrides.asn}")
|
||||||
|
|
||||||
# continue with consumption if no barcode was found
|
# continue with consumption if no barcode was found
|
||||||
document = Consumer().try_consume_file(
|
document = Consumer().try_consume_file(
|
||||||
input_doc.original_file,
|
input_doc.original_file,
|
||||||
override_filename=overrides.filename,
|
override_filename=overrides.filename,
|
||||||
override_title=overrides.title,
|
override_title=overrides.title,
|
||||||
override_correspondent_id=overrides.correspondent_id,
|
override_correspondent_id=overrides.correspondent_id,
|
||||||
override_document_type_id=overrides.document_type_id,
|
override_document_type_id=overrides.document_type_id,
|
||||||
override_tag_ids=overrides.tag_ids,
|
override_tag_ids=overrides.tag_ids,
|
||||||
override_created=overrides.created,
|
override_created=overrides.created,
|
||||||
override_asn=overrides.asn,
|
override_asn=overrides.asn,
|
||||||
override_owner_id=overrides.owner_id,
|
override_owner_id=overrides.owner_id,
|
||||||
)
|
override_storage_path_id=overrides.storage_path_id
|
||||||
|
)
|
||||||
if document:
|
|
||||||
return f"Success. New document id {document.pk} created"
|
if document:
|
||||||
else:
|
return f"Success. New document id {document.pk} created"
|
||||||
raise ConsumerError(
|
else:
|
||||||
"Unknown error: Returned document was null, but "
|
raise ConsumerError(
|
||||||
"no error message was given.",
|
"Unknown error: Returned document was null, but "
|
||||||
)
|
"no error message was given.",
|
||||||
|
)
|
||||||
|
|
||||||
@shared_task
|
|
||||||
def sanity_check():
|
@shared_task
|
||||||
messages = sanity_checker.check_sanity()
|
def sanity_check():
|
||||||
|
messages = sanity_checker.check_sanity()
|
||||||
messages.log_messages()
|
|
||||||
|
messages.log_messages()
|
||||||
if messages.has_error:
|
|
||||||
raise SanityCheckFailedException("Sanity check failed with errors. See log.")
|
if messages.has_error:
|
||||||
elif messages.has_warning:
|
raise SanityCheckFailedException("Sanity check failed with errors. See log.")
|
||||||
return "Sanity check exited with warnings. See log."
|
elif messages.has_warning:
|
||||||
elif len(messages) > 0:
|
return "Sanity check exited with warnings. See log."
|
||||||
return "Sanity check exited with infos. See log."
|
elif len(messages) > 0:
|
||||||
else:
|
return "Sanity check exited with infos. See log."
|
||||||
return "No issues detected."
|
else:
|
||||||
|
return "No issues detected."
|
||||||
|
|
||||||
@shared_task
|
|
||||||
def bulk_update_documents(document_ids):
|
@shared_task
|
||||||
documents = Document.objects.filter(id__in=document_ids)
|
def bulk_update_documents(document_ids):
|
||||||
|
documents = Document.objects.filter(id__in=document_ids)
|
||||||
ix = index.open_index()
|
|
||||||
|
ix = index.open_index()
|
||||||
for doc in documents:
|
|
||||||
post_save.send(Document, instance=doc, created=False)
|
for doc in documents:
|
||||||
|
post_save.send(Document, instance=doc, created=False)
|
||||||
with AsyncWriter(ix) as writer:
|
|
||||||
for doc in documents:
|
with AsyncWriter(ix) as writer:
|
||||||
index.update_document(writer, doc)
|
for doc in documents:
|
||||||
|
index.update_document(writer, doc)
|
||||||
|
|
||||||
@shared_task
|
|
||||||
def update_document_archive_file(document_id):
|
@shared_task
|
||||||
"""
|
def update_document_archive_file(document_id):
|
||||||
Re-creates the archive file of a document, including new OCR content and thumbnail
|
"""
|
||||||
"""
|
Re-creates the archive file of a document, including new OCR content and thumbnail
|
||||||
document = Document.objects.get(id=document_id)
|
"""
|
||||||
|
document = Document.objects.get(id=document_id)
|
||||||
mime_type = document.mime_type
|
|
||||||
|
mime_type = document.mime_type
|
||||||
parser_class: Type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
|
|
||||||
|
parser_class: Type[DocumentParser] = get_parser_class_for_mime_type(mime_type)
|
||||||
if not parser_class:
|
|
||||||
logger.error(
|
if not parser_class:
|
||||||
f"No parser found for mime type {mime_type}, cannot "
|
logger.error(
|
||||||
f"archive document {document} (ID: {document_id})",
|
f"No parser found for mime type {mime_type}, cannot "
|
||||||
)
|
f"archive document {document} (ID: {document_id})",
|
||||||
return
|
)
|
||||||
|
return
|
||||||
parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
|
|
||||||
|
parser: DocumentParser = parser_class(logging_group=uuid.uuid4())
|
||||||
try:
|
|
||||||
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
try:
|
||||||
|
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
||||||
thumbnail = parser.get_thumbnail(
|
|
||||||
document.source_path,
|
thumbnail = parser.get_thumbnail(
|
||||||
mime_type,
|
document.source_path,
|
||||||
document.get_public_filename(),
|
mime_type,
|
||||||
)
|
document.get_public_filename(),
|
||||||
|
)
|
||||||
if parser.get_archive_path():
|
|
||||||
with transaction.atomic():
|
if parser.get_archive_path():
|
||||||
with open(parser.get_archive_path(), "rb") as f:
|
with transaction.atomic():
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
with open(parser.get_archive_path(), "rb") as f:
|
||||||
# I'm going to save first so that in case the file move
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
# fails, the database is rolled back.
|
# I'm going to save first so that in case the file move
|
||||||
# We also don't use save() since that triggers the filehandling
|
# fails, the database is rolled back.
|
||||||
# logic, and we don't want that yet (file not yet in place)
|
# We also don't use save() since that triggers the filehandling
|
||||||
document.archive_filename = generate_unique_filename(
|
# logic, and we don't want that yet (file not yet in place)
|
||||||
document,
|
document.archive_filename = generate_unique_filename(
|
||||||
archive_filename=True,
|
document,
|
||||||
)
|
archive_filename=True,
|
||||||
Document.objects.filter(pk=document.pk).update(
|
)
|
||||||
archive_checksum=checksum,
|
Document.objects.filter(pk=document.pk).update(
|
||||||
content=parser.get_text(),
|
archive_checksum=checksum,
|
||||||
archive_filename=document.archive_filename,
|
content=parser.get_text(),
|
||||||
)
|
archive_filename=document.archive_filename,
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
)
|
||||||
create_source_path_directory(document.archive_path)
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
shutil.move(parser.get_archive_path(), document.archive_path)
|
create_source_path_directory(document.archive_path)
|
||||||
shutil.move(thumbnail, document.thumbnail_path)
|
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||||
|
shutil.move(thumbnail, document.thumbnail_path)
|
||||||
with index.open_index_writer() as writer:
|
|
||||||
index.update_document(writer, document)
|
with index.open_index_writer() as writer:
|
||||||
|
index.update_document(writer, document)
|
||||||
except Exception:
|
|
||||||
logger.exception(
|
except Exception:
|
||||||
f"Error while parsing document {document} (ID: {document_id})",
|
logger.exception(
|
||||||
)
|
f"Error while parsing document {document} (ID: {document_id})",
|
||||||
finally:
|
)
|
||||||
parser.cleanup()
|
finally:
|
||||||
|
parser.cleanup()
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user