diff --git a/src-ui/src/app/app.component.ts b/src-ui/src/app/app.component.ts index e2cc46fe0..c8f5f5c15 100644 --- a/src-ui/src/app/app.component.ts +++ b/src-ui/src/app/app.component.ts @@ -268,7 +268,8 @@ export class AppComponent implements OnInit, OnDestroy { this.fileLeave(true) let storagePathId = parseInt(this.route.snapshot.queryParams['spid']) storagePathId = !isNaN(storagePathId) ? storagePathId : undefined - this.uploadDocumentsService.uploadFiles(files, { storagePathId }) + const isUploadWithFolders = files.every(f => 'fullPath' in f.fileEntry && typeof f.fileEntry.fullPath === 'string' && (f.fileEntry.fullPath as string).split('/').filter(s => !!s).length > 1) + this.uploadDocumentsService.uploadFiles(files, { storagePathId, isUploadWithFolders }) this.toastService.showInfo($localize`Initiating upload...`, 3000) } } diff --git a/src/documents/consumer.py b/src/documents/consumer.py index c7d1a1c39..c9cb5958b 100644 --- a/src/documents/consumer.py +++ b/src/documents/consumer.py @@ -607,8 +607,22 @@ class Consumer(LoggingMixin): ) if self.full_path: + # e.g. full_path: "/CDV#3500648756/OR#1161.pdf" + # e.g. ['CDV#3500648756'] folders = self.full_path.split('/')[:-1] + # remove empty values from splitting the leading slash folders = [i for i in folders if i] + + # e.g. user dropped the file in storage path id 26 + # which is "folder_test" or "test/test2/test3" + if document.storage_path: + # e.g. ['test', 'test2', 'test3'] + parent_folders = document.storage_path.path.split('/') + # just double check that there are no empty values from leading slashes + parent_folders = [i for i in parent_folders if i] + # e.g. "test/test2/test3/CDV#3500648756" + folders = parent_folders + folders + folder_path = '/'.join(folders) print(f'folder_path: {folder_path}') diff --git a/src/documents/file_handling.py b/src/documents/file_handling.py index c046ae15a..8575a7513 100644 --- a/src/documents/file_handling.py +++ b/src/documents/file_handling.py @@ -1,248 +1,252 @@ -import logging -import os -from collections import defaultdict -from pathlib import PurePath - -import pathvalidate -from django.conf import settings -from django.template.defaultfilters import slugify -from django.utils import timezone -from documents.models import Document - -logger = logging.getLogger("paperless.filehandling") - - -class defaultdictNoStr(defaultdict): - def __str__(self): - raise ValueError("Don't use {tags} directly.") - - -def create_source_path_directory(source_path): - os.makedirs(os.path.dirname(source_path), exist_ok=True) - - -def delete_empty_directories(directory, root): - if not os.path.isdir(directory): - return - - # Go up in the directory hierarchy and try to delete all directories - directory = os.path.normpath(directory) - root = os.path.normpath(root) - - if not directory.startswith(root + os.path.sep): - # don't do anything outside our originals folder. - - # append os.path.set so that we avoid these cases: - # directory = /home/originals2/test - # root = /home/originals ("/" gets appended and startswith fails) - return - - while directory != root: - if not os.listdir(directory): - # it's empty - try: - os.rmdir(directory) - except OSError: - # whatever. empty directories aren't that bad anyway. - return - else: - # it's not empty. - return - - # go one level up - directory = os.path.normpath(os.path.dirname(directory)) - - -def many_to_dictionary(field): - # Converts ManyToManyField to dictionary by assuming, that field - # entries contain an _ or - which will be used as a delimiter - mydictionary = dict() - - for index, t in enumerate(field.all()): - # Populate tag names by index - mydictionary[index] = slugify(t.name) - - # Find delimiter - delimiter = t.name.find("_") - - if delimiter == -1: - delimiter = t.name.find("-") - - if delimiter == -1: - continue - - key = t.name[:delimiter] - value = t.name[delimiter + 1 :] - - mydictionary[slugify(key)] = slugify(value) - - return mydictionary - - -def generate_unique_filename(doc, archive_filename=False): - """ - Generates a unique filename for doc in settings.ORIGINALS_DIR. - - The returned filename is guaranteed to be either the current filename - of the document if unchanged, or a new filename that does not correspondent - to any existing files. The function will append _01, _02, etc to the - filename before the extension to avoid conflicts. - - If archive_filename is True, return a unique archive filename instead. - - """ - if archive_filename: - old_filename = doc.archive_filename - root = settings.ARCHIVE_DIR - else: - old_filename = doc.filename - root = settings.ORIGINALS_DIR - - # If generating archive filenames, try to make a name that is similar to - # the original filename first. - - if archive_filename and doc.filename: - new_filename = os.path.splitext(doc.filename)[0] + ".pdf" - if new_filename == old_filename or not os.path.exists( - os.path.join(root, new_filename), - ): - return new_filename - - counter = 0 - - while True: - new_filename = generate_filename( - doc, - counter, - archive_filename=archive_filename, - ) - if new_filename == old_filename: - # still the same as before. - return new_filename - - if os.path.exists(os.path.join(root, new_filename)): - counter += 1 - else: - return new_filename - - -def generate_filename( - doc: Document, - counter=0, - append_gpg=True, - archive_filename=False, -): - path = "" - filename_format = settings.FILENAME_FORMAT - - try: - if doc.storage_path is not None: - logger.debug( - f"Document has storage_path {doc.storage_path.id} " - f"({doc.storage_path.path}) set", - ) - filename_format = doc.storage_path.path - - if filename_format is not None: - tags = defaultdictNoStr( - lambda: slugify(None), - many_to_dictionary(doc.tags), - ) - - tag_list = pathvalidate.sanitize_filename( - ",".join( - sorted(tag.name for tag in doc.tags.all()), - ), - replacement_text="-", - ) - - no_value_default = "-none-" - - if doc.correspondent: - correspondent = pathvalidate.sanitize_filename( - doc.correspondent.name, - replacement_text="-", - ) - else: - correspondent = no_value_default - - if doc.document_type: - document_type = pathvalidate.sanitize_filename( - doc.document_type.name, - replacement_text="-", - ) - else: - document_type = no_value_default - - if doc.archive_serial_number: - asn = str(doc.archive_serial_number) - else: - asn = no_value_default - - if doc.owner is not None: - owner_username_str = str(doc.owner.username) - else: - owner_username_str = no_value_default - - if doc.original_filename is not None: - # No extension - original_name = PurePath(doc.original_filename).with_suffix("").name - else: - original_name = no_value_default - - # Convert UTC database datetime to localized date - local_added = timezone.localdate(doc.added) - local_created = timezone.localdate(doc.created) - - path = filename_format.format( - title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"), - correspondent=correspondent, - document_type=document_type, - created=local_created.isoformat(), - created_year=local_created.strftime("%Y"), - created_year_short=local_created.strftime("%y"), - created_month=local_created.strftime("%m"), - created_month_name=local_created.strftime("%B"), - created_month_name_short=local_created.strftime("%b"), - created_day=local_created.strftime("%d"), - added=local_added.isoformat(), - added_year=local_added.strftime("%Y"), - added_year_short=local_added.strftime("%y"), - added_month=local_added.strftime("%m"), - added_month_name=local_added.strftime("%B"), - added_month_name_short=local_added.strftime("%b"), - added_day=local_added.strftime("%d"), - asn=asn, - tags=tags, - tag_list=tag_list, - owner_username=owner_username_str, - original_name=original_name, - ).strip() - - if settings.FILENAME_FORMAT_REMOVE_NONE: - path = path.replace("-none-/", "") # remove empty directories - path = path.replace(" -none-", "") # remove when spaced, with space - path = path.replace("-none-", "") # remove rest of the occurences - - path = path.replace("-none-", "none") # backward compatibility - path = path.strip(os.sep) - - except (ValueError, KeyError, IndexError): - logger.warning( - f"Invalid filename_format '{filename_format}', falling back to default", - ) - - counter_str = f"_{counter:02}" if counter else "" - - filetype_str = ".pdf" if archive_filename else doc.file_type - - if len(path) > 0: - filename = f"{path}{counter_str}{filetype_str}" - else: - filename = f"{doc.pk:07}{counter_str}{filetype_str}" - - # Append .gpg for encrypted files - if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG: - filename += ".gpg" - - return filename +import logging +import os +from collections import defaultdict +from pathlib import PurePath +# import uuid + +import pathvalidate +from django.conf import settings +from django.template.defaultfilters import slugify +from django.utils import timezone +from documents.models import Document + +logger = logging.getLogger("paperless.filehandling") + + +class defaultdictNoStr(defaultdict): + def __str__(self): + raise ValueError("Don't use {tags} directly.") + + +def create_source_path_directory(source_path): + os.makedirs(os.path.dirname(source_path), exist_ok=True) + + +def delete_empty_directories(directory, root): + if not os.path.isdir(directory): + return + + # Go up in the directory hierarchy and try to delete all directories + directory = os.path.normpath(directory) + root = os.path.normpath(root) + + if not directory.startswith(root + os.path.sep): + # don't do anything outside our originals folder. + + # append os.path.set so that we avoid these cases: + # directory = /home/originals2/test + # root = /home/originals ("/" gets appended and startswith fails) + return + + while directory != root: + if not os.listdir(directory): + # it's empty + try: + os.rmdir(directory) + except OSError: + # whatever. empty directories aren't that bad anyway. + return + else: + # it's not empty. + return + + # go one level up + directory = os.path.normpath(os.path.dirname(directory)) + + +def many_to_dictionary(field): + # Converts ManyToManyField to dictionary by assuming, that field + # entries contain an _ or - which will be used as a delimiter + mydictionary = dict() + + for index, t in enumerate(field.all()): + # Populate tag names by index + mydictionary[index] = slugify(t.name) + + # Find delimiter + delimiter = t.name.find("_") + + if delimiter == -1: + delimiter = t.name.find("-") + + if delimiter == -1: + continue + + key = t.name[:delimiter] + value = t.name[delimiter + 1 :] + + mydictionary[slugify(key)] = slugify(value) + + return mydictionary + + +def generate_unique_filename(doc, archive_filename=False): + """ + Generates a unique filename for doc in settings.ORIGINALS_DIR. + + The returned filename is guaranteed to be either the current filename + of the document if unchanged, or a new filename that does not correspondent + to any existing files. The function will append _01, _02, etc to the + filename before the extension to avoid conflicts. + + If archive_filename is True, return a unique archive filename instead. + + """ + if archive_filename: + old_filename = doc.archive_filename + root = settings.ARCHIVE_DIR + else: + old_filename = doc.filename + root = settings.ORIGINALS_DIR + + # If generating archive filenames, try to make a name that is similar to + # the original filename first. + + if archive_filename and doc.filename: + new_filename = os.path.splitext(doc.filename)[0] + ".pdf" + if new_filename == old_filename or not os.path.exists( + os.path.join(root, new_filename), + ): + return new_filename + + counter = 0 + + while True: + new_filename = generate_filename( + doc, + counter, + archive_filename=archive_filename, + ) + if new_filename == old_filename: + # still the same as before. + return new_filename + + if os.path.exists(os.path.join(root, new_filename)): + counter += 1 + else: + return new_filename + # new_filename = str(uuid.uuid4()) + ".pdf" + # if not os.path.exists(os.path.join(root, new_filename)): + # return new_filename + + +def generate_filename( + doc: Document, + counter=0, + append_gpg=True, + archive_filename=False, +): + path = "" + filename_format = settings.FILENAME_FORMAT + + try: + if doc.storage_path is not None: + logger.debug( + f"Document has storage_path {doc.storage_path.id} " + f"({doc.storage_path.path}) set", + ) + filename_format = doc.storage_path.path + '/' + doc.original_filename + + if filename_format is not None: + tags = defaultdictNoStr( + lambda: slugify(None), + many_to_dictionary(doc.tags), + ) + + tag_list = pathvalidate.sanitize_filename( + ",".join( + sorted(tag.name for tag in doc.tags.all()), + ), + replacement_text="-", + ) + + no_value_default = "-none-" + + if doc.correspondent: + correspondent = pathvalidate.sanitize_filename( + doc.correspondent.name, + replacement_text="-", + ) + else: + correspondent = no_value_default + + if doc.document_type: + document_type = pathvalidate.sanitize_filename( + doc.document_type.name, + replacement_text="-", + ) + else: + document_type = no_value_default + + if doc.archive_serial_number: + asn = str(doc.archive_serial_number) + else: + asn = no_value_default + + if doc.owner is not None: + owner_username_str = str(doc.owner.username) + else: + owner_username_str = no_value_default + + if doc.original_filename is not None: + # No extension + original_name = PurePath(doc.original_filename).with_suffix("").name + else: + original_name = no_value_default + + # Convert UTC database datetime to localized date + local_added = timezone.localdate(doc.added) + local_created = timezone.localdate(doc.created) + + path = filename_format.format( + title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"), + correspondent=correspondent, + document_type=document_type, + created=local_created.isoformat(), + created_year=local_created.strftime("%Y"), + created_year_short=local_created.strftime("%y"), + created_month=local_created.strftime("%m"), + created_month_name=local_created.strftime("%B"), + created_month_name_short=local_created.strftime("%b"), + created_day=local_created.strftime("%d"), + added=local_added.isoformat(), + added_year=local_added.strftime("%Y"), + added_year_short=local_added.strftime("%y"), + added_month=local_added.strftime("%m"), + added_month_name=local_added.strftime("%B"), + added_month_name_short=local_added.strftime("%b"), + added_day=local_added.strftime("%d"), + asn=asn, + tags=tags, + tag_list=tag_list, + owner_username=owner_username_str, + original_name=original_name, + ).strip() + + if settings.FILENAME_FORMAT_REMOVE_NONE: + path = path.replace("-none-/", "") # remove empty directories + path = path.replace(" -none-", "") # remove when spaced, with space + path = path.replace("-none-", "") # remove rest of the occurences + + path = path.replace("-none-", "none") # backward compatibility + path = path.strip(os.sep) + + except (ValueError, KeyError, IndexError): + logger.warning( + f"Invalid filename_format '{filename_format}', falling back to default", + ) + + counter_str = f"_{counter:02}" if counter else "" + + filetype_str = ".pdf" if archive_filename else doc.file_type + + if len(path) > 0: + filename = f"{path}{counter_str}{filetype_str}" + else: + filename = f"{doc.pk:07}{counter_str}{filetype_str}" + + # Append .gpg for encrypted files + if append_gpg and doc.storage_type == doc.STORAGE_TYPE_GPG: + filename += ".gpg" + + return filename diff --git a/src/google_cloud_storage/storage.py b/src/google_cloud_storage/storage.py index af1db1267..88677ad02 100644 --- a/src/google_cloud_storage/storage.py +++ b/src/google_cloud_storage/storage.py @@ -30,20 +30,26 @@ def upload_file(source, target): blob.upload_from_file(read_file_2) def get_file_from_gcs(bucket_path): - if (not client) or (not bucket): - raise Exception("Google Cloud Storage is not initialized.") + try: + if (not client) or (not bucket): + raise Exception("Google Cloud Storage is not initialized.") - # print("Getting blob from Google Cloud Storage") - # Create a blob object representing the path in the bucket - blob = bucket.blob(str(bucket_path)) + # print("Getting blob from Google Cloud Storage") + # Create a blob object representing the path in the bucket + blob = bucket.blob(str(bucket_path)) - # Download the file as a byte array - byte_stream = BytesIO() - # print("Downloading file from Google Cloud Storage") - blob.download_to_file(byte_stream) + # Download the file as a byte array + byte_stream = BytesIO() + # print("Downloading file from Google Cloud Storage") + blob.download_to_file(byte_stream) - # Seek to the start of the byte stream to allow reading from the beginning - byte_stream.seek(0) + # Seek to the start of the byte stream to allow reading from the beginning + byte_stream.seek(0) - # print("Returning downloaded file to caller") - return byte_stream \ No newline at end of file + # print("Returning downloaded file to caller") + return byte_stream + except: + return None + +def exists(): + return False \ No newline at end of file