feat: add get file from gcs
gcs - google cloud storage
This commit is contained in:
parent
81ce4e4597
commit
892b033cdd
@ -172,7 +172,7 @@ export class DocumentDetailComponent
|
||||
}
|
||||
|
||||
get folderPath(): string {
|
||||
return this.storagePaths.find(s => s.id === this.document.storage_path)?.path ?? '';
|
||||
return this.storagePaths?.find(s => s.id === this.document?.storage_path)?.path ?? '';
|
||||
}
|
||||
|
||||
getContentType() {
|
||||
@ -402,6 +402,7 @@ export class DocumentDetailComponent
|
||||
}
|
||||
|
||||
updateComponent(doc: PaperlessDocument) {
|
||||
console.log('[updateComponent] doc:', doc);
|
||||
this.document = doc
|
||||
this.requiresPassword = false
|
||||
this.documentsService
|
||||
|
@ -37,7 +37,7 @@ from .parsers import ParseError
|
||||
from .signals import document_consumption_finished
|
||||
from .signals import document_consumption_started
|
||||
|
||||
from google.cloud import storage
|
||||
from google_cloud_storage.storage import upload_file, is_gcs_enabled
|
||||
|
||||
|
||||
class ConsumerError(Exception):
|
||||
@ -433,16 +433,6 @@ class Consumer(LoggingMixin):
|
||||
|
||||
classifier = load_classifier()
|
||||
|
||||
try:
|
||||
self.log("debug", "Initializing Google Cloud Storage: " + str(settings.GCP_SERVICE_ACCOUNT_JSON))
|
||||
# Prepare Google Cloud Storage client
|
||||
# client = storage.Client()
|
||||
client = storage.Client.from_service_account_info(settings.GCP_SERVICE_ACCOUNT_JSON)
|
||||
self.log("debug", "Getting bucket: " + settings.GCP_BUCKET_NAME)
|
||||
self.bucket = client.bucket(settings.GCP_BUCKET_NAME)
|
||||
except Exception as e:
|
||||
self.log("warning", 'Failed to initialize GCP: ' + str(e))
|
||||
|
||||
self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
|
||||
# now that everything is done, we can start to store the document
|
||||
# in the system. This will be a transaction and reasonably fast.
|
||||
@ -635,18 +625,12 @@ class Consumer(LoggingMixin):
|
||||
|
||||
|
||||
def _write(self, storage_type, source, target):
|
||||
with open(source, "rb") as read_file, open(target, "wb") as write_file:
|
||||
write_file.write(read_file.read())
|
||||
if is_gcs_enabled:
|
||||
upload_file(source, target)
|
||||
else:
|
||||
with open(source, "rb") as read_file, open(target, "wb") as write_file:
|
||||
write_file.write(read_file.read())
|
||||
|
||||
with open(source, "rb") as read_file_2:
|
||||
self.log("debug", "GOOGLE_CLOUD_STORAGE:" + str(settings.GOOGLE_CLOUD_STORAGE))
|
||||
# Reference: https://github.com/GoogleCloudPlatform/getting-started-python/blob/main/bookshelf/storage.py#L59
|
||||
if settings.GOOGLE_CLOUD_STORAGE:
|
||||
self.log("debug", "Uploading to Google Cloud Storage")
|
||||
# GCP was initialized earlier
|
||||
blob = self.bucket.blob(str(target))
|
||||
# Reference: https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#google_cloud_storage_blob_Blob_upload_from_file
|
||||
blob.upload_from_file(read_file_2)
|
||||
|
||||
def _log_script_outputs(self, completed_process: CompletedProcess):
|
||||
"""
|
||||
|
@ -19,6 +19,8 @@ from django.utils import timezone
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from documents.parsers import get_default_file_extension
|
||||
|
||||
from google_cloud_storage.storage import is_gcs_enabled, get_file_from_gcs
|
||||
|
||||
ALL_STATES = sorted(states.ALL_STATES)
|
||||
TASK_STATE_CHOICES = sorted(zip(ALL_STATES, ALL_STATES))
|
||||
|
||||
@ -312,6 +314,9 @@ class Document(ModelWithOwner):
|
||||
|
||||
@property
|
||||
def source_file(self):
|
||||
# print(f"is_gcs_enabled: {is_gcs_enabled}")
|
||||
if is_gcs_enabled:
|
||||
return get_file_from_gcs(self.source_path)
|
||||
return open(self.source_path, "rb")
|
||||
|
||||
@property
|
||||
@ -327,6 +332,9 @@ class Document(ModelWithOwner):
|
||||
|
||||
@property
|
||||
def archive_file(self):
|
||||
# print(f"is_gcs_enabled: {is_gcs_enabled}")
|
||||
if is_gcs_enabled:
|
||||
return get_file_from_gcs(self.archive_path)
|
||||
return open(self.archive_path, "rb")
|
||||
|
||||
def get_public_filename(self, archive=False, counter=0, suffix=None) -> str:
|
||||
@ -364,6 +372,9 @@ class Document(ModelWithOwner):
|
||||
|
||||
@property
|
||||
def thumbnail_file(self):
|
||||
# print(f"is_gcs_enabled: {is_gcs_enabled}")
|
||||
if is_gcs_enabled:
|
||||
return get_file_from_gcs(self.thumbnail_path)
|
||||
return open(self.thumbnail_path, "rb")
|
||||
|
||||
@property
|
||||
|
49
src/google_cloud_storage/storage.py
Normal file
49
src/google_cloud_storage/storage.py
Normal file
@ -0,0 +1,49 @@
|
||||
|
||||
from google.cloud import storage
|
||||
from django.conf import settings
|
||||
from io import BytesIO
|
||||
|
||||
is_gcs_enabled = settings.GOOGLE_CLOUD_STORAGE
|
||||
client = None
|
||||
bucket = None
|
||||
|
||||
if is_gcs_enabled:
|
||||
print("Initializing Google Cloud Storage: " + str(settings.GCP_SERVICE_ACCOUNT_JSON))
|
||||
# Prepare Google Cloud Storage client
|
||||
# client = storage.Client()
|
||||
client = storage.Client.from_service_account_info(settings.GCP_SERVICE_ACCOUNT_JSON)
|
||||
|
||||
print("Getting bucket: " + settings.GCP_BUCKET_NAME)
|
||||
|
||||
bucket = client.bucket(settings.GCP_BUCKET_NAME)
|
||||
|
||||
|
||||
def upload_file(source, target):
|
||||
if (not client) or (not bucket):
|
||||
return
|
||||
|
||||
with open(source, "rb") as read_file_2:
|
||||
# Reference: https://github.com/GoogleCloudPlatform/getting-started-python/blob/main/bookshelf/storage.py#L59
|
||||
print("Uploading to Google Cloud Storage")
|
||||
blob = bucket.blob(str(target))
|
||||
# Reference: https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#google_cloud_storage_blob_Blob_upload_from_file
|
||||
blob.upload_from_file(read_file_2)
|
||||
|
||||
def get_file_from_gcs(bucket_path):
|
||||
if (not client) or (not bucket):
|
||||
raise Exception("Google Cloud Storage is not initialized.")
|
||||
|
||||
# print("Getting blob from Google Cloud Storage")
|
||||
# Create a blob object representing the path in the bucket
|
||||
blob = bucket.blob(str(bucket_path))
|
||||
|
||||
# Download the file as a byte array
|
||||
byte_stream = BytesIO()
|
||||
# print("Downloading file from Google Cloud Storage")
|
||||
blob.download_to_file(byte_stream)
|
||||
|
||||
# Seek to the start of the byte stream to allow reading from the beginning
|
||||
byte_stream.seek(0)
|
||||
|
||||
# print("Returning downloaded file to caller")
|
||||
return byte_stream
|
Loading…
x
Reference in New Issue
Block a user