feat: add get file from gcs

gcs - google cloud storage
This commit is contained in:
Martin Tan 2023-09-25 22:54:54 +08:00
parent 81ce4e4597
commit 892b033cdd
4 changed files with 68 additions and 23 deletions

View File

@ -172,7 +172,7 @@ export class DocumentDetailComponent
}
get folderPath(): string {
return this.storagePaths.find(s => s.id === this.document.storage_path)?.path ?? '';
return this.storagePaths?.find(s => s.id === this.document?.storage_path)?.path ?? '';
}
getContentType() {
@ -402,6 +402,7 @@ export class DocumentDetailComponent
}
updateComponent(doc: PaperlessDocument) {
console.log('[updateComponent] doc:', doc);
this.document = doc
this.requiresPassword = false
this.documentsService

View File

@ -37,7 +37,7 @@ from .parsers import ParseError
from .signals import document_consumption_finished
from .signals import document_consumption_started
from google.cloud import storage
from google_cloud_storage.storage import upload_file, is_gcs_enabled
class ConsumerError(Exception):
@ -433,16 +433,6 @@ class Consumer(LoggingMixin):
classifier = load_classifier()
try:
self.log("debug", "Initializing Google Cloud Storage: " + str(settings.GCP_SERVICE_ACCOUNT_JSON))
# Prepare Google Cloud Storage client
# client = storage.Client()
client = storage.Client.from_service_account_info(settings.GCP_SERVICE_ACCOUNT_JSON)
self.log("debug", "Getting bucket: " + settings.GCP_BUCKET_NAME)
self.bucket = client.bucket(settings.GCP_BUCKET_NAME)
except Exception as e:
self.log("warning", 'Failed to initialize GCP: ' + str(e))
self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
# now that everything is done, we can start to store the document
# in the system. This will be a transaction and reasonably fast.
@ -635,18 +625,12 @@ class Consumer(LoggingMixin):
def _write(self, storage_type, source, target):
with open(source, "rb") as read_file, open(target, "wb") as write_file:
write_file.write(read_file.read())
if is_gcs_enabled:
upload_file(source, target)
else:
with open(source, "rb") as read_file, open(target, "wb") as write_file:
write_file.write(read_file.read())
with open(source, "rb") as read_file_2:
self.log("debug", "GOOGLE_CLOUD_STORAGE:" + str(settings.GOOGLE_CLOUD_STORAGE))
# Reference: https://github.com/GoogleCloudPlatform/getting-started-python/blob/main/bookshelf/storage.py#L59
if settings.GOOGLE_CLOUD_STORAGE:
self.log("debug", "Uploading to Google Cloud Storage")
# GCP was initialized earlier
blob = self.bucket.blob(str(target))
# Reference: https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#google_cloud_storage_blob_Blob_upload_from_file
blob.upload_from_file(read_file_2)
def _log_script_outputs(self, completed_process: CompletedProcess):
"""

View File

@ -19,6 +19,8 @@ from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from documents.parsers import get_default_file_extension
from google_cloud_storage.storage import is_gcs_enabled, get_file_from_gcs
ALL_STATES = sorted(states.ALL_STATES)
TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
@ -312,6 +314,9 @@ class Document(ModelWithOwner):
@property
def source_file(self):
# print(f"is_gcs_enabled: {is_gcs_enabled}")
if is_gcs_enabled:
return get_file_from_gcs(self.source_path)
return open(self.source_path, "rb")
@property
@ -327,6 +332,9 @@ class Document(ModelWithOwner):
@property
def archive_file(self):
# print(f"is_gcs_enabled: {is_gcs_enabled}")
if is_gcs_enabled:
return get_file_from_gcs(self.archive_path)
return open(self.archive_path, "rb")
def get_public_filename(self, archive=False, counter=0, suffix=None) -> str:
@ -364,6 +372,9 @@ class Document(ModelWithOwner):
@property
def thumbnail_file(self):
# print(f"is_gcs_enabled: {is_gcs_enabled}")
if is_gcs_enabled:
return get_file_from_gcs(self.thumbnail_path)
return open(self.thumbnail_path, "rb")
@property

View File

@ -0,0 +1,49 @@
from google.cloud import storage
from django.conf import settings
from io import BytesIO
is_gcs_enabled = settings.GOOGLE_CLOUD_STORAGE
client = None
bucket = None
if is_gcs_enabled:
print("Initializing Google Cloud Storage: " + str(settings.GCP_SERVICE_ACCOUNT_JSON))
# Prepare Google Cloud Storage client
# client = storage.Client()
client = storage.Client.from_service_account_info(settings.GCP_SERVICE_ACCOUNT_JSON)
print("Getting bucket: " + settings.GCP_BUCKET_NAME)
bucket = client.bucket(settings.GCP_BUCKET_NAME)
def upload_file(source, target):
if (not client) or (not bucket):
return
with open(source, "rb") as read_file_2:
# Reference: https://github.com/GoogleCloudPlatform/getting-started-python/blob/main/bookshelf/storage.py#L59
print("Uploading to Google Cloud Storage")
blob = bucket.blob(str(target))
# Reference: https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#google_cloud_storage_blob_Blob_upload_from_file
blob.upload_from_file(read_file_2)
def get_file_from_gcs(bucket_path):
if (not client) or (not bucket):
raise Exception("Google Cloud Storage is not initialized.")
# print("Getting blob from Google Cloud Storage")
# Create a blob object representing the path in the bucket
blob = bucket.blob(str(bucket_path))
# Download the file as a byte array
byte_stream = BytesIO()
# print("Downloading file from Google Cloud Storage")
blob.download_to_file(byte_stream)
# Seek to the start of the byte stream to allow reading from the beginning
byte_stream.seek(0)
# print("Returning downloaded file to caller")
return byte_stream