feat: adding google cloud storage uploading

2023-09-24 01:45:27 +08:00 · 2023-09-24 01:45:27 +08:00 · 81ce4e4597
commit 81ce4e4597
parent e8a849e6c1
4 changed files with 135 additions and 102 deletions
--- a/2
+++ b/2
@ -198,6 +198,8 @@ RUN set -eux \
  && echo "Installing psycopg2" \
    && python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \
    && python3 -m pip list \
  && echo "Installing google cloud storage" \
    && python3 -m pip install google-cloud-storage \
  && echo "Cleaning up image layer" \
    && cd ../ \
    && rm -rf paperless-ngx \
--- a/5
+++ b/5
@ -51,7 +51,6 @@ uvicorn = {extras = ["standard"], version = "*"}
 concurrent-log-handler = "*"
 "pdfminer.six" = "*"
 pyzbar = "*"
 mysqlclient = "*"
 celery = {extras = ["redis"], version = "*"}
 setproctitle = "*"
 nltk = "*"
@ -64,6 +63,10 @@ zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"}
 #
 # Pin this until piwheels is building 1.9 (see https://www.piwheels.org/project/scipy/)
 scipy = "==1.8.1"
 google-cloud-storage = "*"
 google = "*"
 google-api-core = "*"
 google-cloud-core = "*"
 [dev-packages]
 coveralls = "*"
--- a/src/documents/consumer.py
+++ b/src/documents/consumer.py
@ -37,6 +37,8 @@ from .parsers import ParseError
 from .signals import document_consumption_finished
 from .signals import document_consumption_started
 from google.cloud import storage
 class ConsumerError(Exception):
    pass
@ -431,6 +433,16 @@ class Consumer(LoggingMixin):
        classifier = load_classifier()
        try:
            self.log("debug", "Initializing Google Cloud Storage: " + str(settings.GCP_SERVICE_ACCOUNT_JSON))
            # Prepare Google Cloud Storage client
            # client = storage.Client()
            client = storage.Client.from_service_account_info(settings.GCP_SERVICE_ACCOUNT_JSON)
            self.log("debug", "Getting bucket: " + settings.GCP_BUCKET_NAME)
            self.bucket = client.bucket(settings.GCP_BUCKET_NAME)
        except Exception as e:
            self.log("warning", 'Failed to initialize GCP: ' + str(e))
        self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
        # now that everything is done, we can start to store the document
        # in the system. This will be a transaction and reasonably fast.
@ -487,7 +499,7 @@ class Consumer(LoggingMixin):
                document.save()
                # Delete the file only if it was successfully consumed
-                self.log("debug", f"Deleting file {self.path}")
+                self.log("debug", f"Deleting file 123 {self.path}")
                os.unlink(self.path)
                self.original_path.unlink()
@ -626,6 +638,16 @@ class Consumer(LoggingMixin):
        with open(source, "rb") as read_file, open(target, "wb") as write_file:
            write_file.write(read_file.read())
        with open(source, "rb") as read_file_2:
            self.log("debug", "GOOGLE_CLOUD_STORAGE:" + str(settings.GOOGLE_CLOUD_STORAGE))
            # Reference: https://github.com/GoogleCloudPlatform/getting-started-python/blob/main/bookshelf/storage.py#L59
            if settings.GOOGLE_CLOUD_STORAGE:
                self.log("debug", "Uploading to Google Cloud Storage")
                # GCP was initialized earlier
                blob = self.bucket.blob(str(target))
                # Reference: https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#google_cloud_storage_blob_Blob_upload_from_file
                blob.upload_from_file(read_file_2)
    def _log_script_outputs(self, completed_process: CompletedProcess):
        """
        Decodes a process stdout and stderr streams and logs them to the main log
--- a/src/paperless/settings.py
+++ b/src/paperless/settings.py
@ -217,6 +217,12 @@ def _parse_beat_schedule() -> Dict:
 # NEVER RUN WITH DEBUG IN PRODUCTION.
 DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
 # Google Cloud Storage
 GOOGLE_CLOUD_STORAGE = __get_boolean("GOOGLE_CLOUD_STORAGE", "NO")
 GCP_BUCKET_NAME = os.getenv('GCP_BUCKET_NAME', 'dms_files_local')
 GCP_SERVICE_ACCOUNT_JSON = os.getenv('GCP_SERVICE_ACCOUNT_JSON', '')
 if GCP_SERVICE_ACCOUNT_JSON != '':
    GCP_SERVICE_ACCOUNT_JSON = json.loads(GCP_SERVICE_ACCOUNT_JSON)
 ###############################################################################
 # Directories                                                                 #