feat: adding google cloud storage uploading
This commit is contained in:
parent
e8a849e6c1
commit
81ce4e4597
@ -198,6 +198,8 @@ RUN set -eux \
|
|||||||
&& echo "Installing psycopg2" \
|
&& echo "Installing psycopg2" \
|
||||||
&& python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \
|
&& python3 -m pip install --no-cache-dir ./psycopg2/${PSYCOPG2_VERSION}/${TARGETARCH}${TARGETVARIANT}/psycopg2*.whl \
|
||||||
&& python3 -m pip list \
|
&& python3 -m pip list \
|
||||||
|
&& echo "Installing google cloud storage" \
|
||||||
|
&& python3 -m pip install google-cloud-storage \
|
||||||
&& echo "Cleaning up image layer" \
|
&& echo "Cleaning up image layer" \
|
||||||
&& cd ../ \
|
&& cd ../ \
|
||||||
&& rm -rf paperless-ngx \
|
&& rm -rf paperless-ngx \
|
||||||
|
205
Pipfile
205
Pipfile
@ -1,101 +1,104 @@
|
|||||||
[[source]]
|
[[source]]
|
||||||
url = "https://pypi.python.org/simple"
|
url = "https://pypi.python.org/simple"
|
||||||
verify_ssl = true
|
verify_ssl = true
|
||||||
name = "pypi"
|
name = "pypi"
|
||||||
|
|
||||||
[[source]]
|
[[source]]
|
||||||
url = "https://www.piwheels.org/simple"
|
url = "https://www.piwheels.org/simple"
|
||||||
verify_ssl = true
|
verify_ssl = true
|
||||||
name = "piwheels"
|
name = "piwheels"
|
||||||
|
|
||||||
[packages]
|
[packages]
|
||||||
dateparser = "~=1.1"
|
dateparser = "~=1.1"
|
||||||
django = "~=4.1"
|
django = "~=4.1"
|
||||||
django-cors-headers = "*"
|
django-cors-headers = "*"
|
||||||
django-celery-results = "*"
|
django-celery-results = "*"
|
||||||
django-compression-middleware = "*"
|
django-compression-middleware = "*"
|
||||||
django-guardian = "*"
|
django-guardian = "*"
|
||||||
django-extensions = "*"
|
django-extensions = "*"
|
||||||
django-filter = "~=22.1"
|
django-filter = "~=22.1"
|
||||||
djangorestframework = "~=3.14"
|
djangorestframework = "~=3.14"
|
||||||
djangorestframework-guardian = "*"
|
djangorestframework-guardian = "*"
|
||||||
django-ipware = "*"
|
django-ipware = "*"
|
||||||
filelock = "*"
|
filelock = "*"
|
||||||
gunicorn = "*"
|
gunicorn = "*"
|
||||||
imap-tools = "*"
|
imap-tools = "*"
|
||||||
langdetect = "*"
|
langdetect = "*"
|
||||||
pathvalidate = "*"
|
pathvalidate = "*"
|
||||||
pillow = "~=9.4"
|
pillow = "~=9.4"
|
||||||
pikepdf = "*"
|
pikepdf = "*"
|
||||||
python-gnupg = "*"
|
python-gnupg = "*"
|
||||||
python-dotenv = "*"
|
python-dotenv = "*"
|
||||||
python-dateutil = "*"
|
python-dateutil = "*"
|
||||||
python-magic = "*"
|
python-magic = "*"
|
||||||
psycopg2 = "*"
|
psycopg2 = "*"
|
||||||
rapidfuzz = "*"
|
rapidfuzz = "*"
|
||||||
redis = {extras = ["hiredis"], version = "*"}
|
redis = {extras = ["hiredis"], version = "*"}
|
||||||
scikit-learn = "~=1.2"
|
scikit-learn = "~=1.2"
|
||||||
numpy = "*"
|
numpy = "*"
|
||||||
whitenoise = "~=6.3"
|
whitenoise = "~=6.3"
|
||||||
watchdog = "~=2.2"
|
watchdog = "~=2.2"
|
||||||
whoosh="~=2.7"
|
whoosh="~=2.7"
|
||||||
inotifyrecursive = "~=0.3"
|
inotifyrecursive = "~=0.3"
|
||||||
ocrmypdf = "~=14.0"
|
ocrmypdf = "~=14.0"
|
||||||
tqdm = "*"
|
tqdm = "*"
|
||||||
tika = "*"
|
tika = "*"
|
||||||
# TODO: This will sadly also install daphne+dependencies,
|
# TODO: This will sadly also install daphne+dependencies,
|
||||||
# which an ASGI server we don't need. Adds about 15MB image size.
|
# which an ASGI server we don't need. Adds about 15MB image size.
|
||||||
channels = "~=3.0"
|
channels = "~=3.0"
|
||||||
channels-redis = "*"
|
channels-redis = "*"
|
||||||
uvicorn = {extras = ["standard"], version = "*"}
|
uvicorn = {extras = ["standard"], version = "*"}
|
||||||
concurrent-log-handler = "*"
|
concurrent-log-handler = "*"
|
||||||
"pdfminer.six" = "*"
|
"pdfminer.six" = "*"
|
||||||
pyzbar = "*"
|
pyzbar = "*"
|
||||||
mysqlclient = "*"
|
celery = {extras = ["redis"], version = "*"}
|
||||||
celery = {extras = ["redis"], version = "*"}
|
setproctitle = "*"
|
||||||
setproctitle = "*"
|
nltk = "*"
|
||||||
nltk = "*"
|
pdf2image = "*"
|
||||||
pdf2image = "*"
|
flower = "*"
|
||||||
flower = "*"
|
bleach = "*"
|
||||||
bleach = "*"
|
zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"}
|
||||||
zxing-cpp = {version = "*", platform_machine = "== 'x86_64'"}
|
#
|
||||||
#
|
# Packages locked due to issues (try to check if these are fixed in a release every so often)
|
||||||
# Packages locked due to issues (try to check if these are fixed in a release every so often)
|
#
|
||||||
#
|
# Pin this until piwheels is building 1.9 (see https://www.piwheels.org/project/scipy/)
|
||||||
# Pin this until piwheels is building 1.9 (see https://www.piwheels.org/project/scipy/)
|
scipy = "==1.8.1"
|
||||||
scipy = "==1.8.1"
|
google-cloud-storage = "*"
|
||||||
|
google = "*"
|
||||||
[dev-packages]
|
google-api-core = "*"
|
||||||
coveralls = "*"
|
google-cloud-core = "*"
|
||||||
factory-boy = "*"
|
|
||||||
pytest = "*"
|
[dev-packages]
|
||||||
pytest-cov = "*"
|
coveralls = "*"
|
||||||
pytest-django = "*"
|
factory-boy = "*"
|
||||||
pytest-env = "*"
|
pytest = "*"
|
||||||
pytest-sugar = "*"
|
pytest-cov = "*"
|
||||||
pytest-xdist = "*"
|
pytest-django = "*"
|
||||||
black = "*"
|
pytest-env = "*"
|
||||||
pre-commit = "*"
|
pytest-sugar = "*"
|
||||||
imagehash = "*"
|
pytest-xdist = "*"
|
||||||
mkdocs-material = "*"
|
black = "*"
|
||||||
ruff = "*"
|
pre-commit = "*"
|
||||||
|
imagehash = "*"
|
||||||
[typing-dev]
|
mkdocs-material = "*"
|
||||||
mypy = "*"
|
ruff = "*"
|
||||||
types-Pillow = "*"
|
|
||||||
django-filter-stubs = "*"
|
[typing-dev]
|
||||||
types-python-dateutil = "*"
|
mypy = "*"
|
||||||
djangorestframework-stubs = {extras= ["compatible-mypy"], version="*"}
|
types-Pillow = "*"
|
||||||
celery-types = "*"
|
django-filter-stubs = "*"
|
||||||
django-stubs = {extras= ["compatible-mypy"], version="*"}
|
types-python-dateutil = "*"
|
||||||
types-dateparser = "*"
|
djangorestframework-stubs = {extras= ["compatible-mypy"], version="*"}
|
||||||
types-bleach = "*"
|
celery-types = "*"
|
||||||
types-humanfriendly = "*"
|
django-stubs = {extras= ["compatible-mypy"], version="*"}
|
||||||
types-redis = "*"
|
types-dateparser = "*"
|
||||||
types-tqdm = "*"
|
types-bleach = "*"
|
||||||
types-Markdown = "*"
|
types-humanfriendly = "*"
|
||||||
types-Pygments = "*"
|
types-redis = "*"
|
||||||
types-backports = "*"
|
types-tqdm = "*"
|
||||||
types-colorama = "*"
|
types-Markdown = "*"
|
||||||
types-psycopg2 = "*"
|
types-Pygments = "*"
|
||||||
types-setuptools = "*"
|
types-backports = "*"
|
||||||
|
types-colorama = "*"
|
||||||
|
types-psycopg2 = "*"
|
||||||
|
types-setuptools = "*"
|
||||||
|
@ -37,6 +37,8 @@ from .parsers import ParseError
|
|||||||
from .signals import document_consumption_finished
|
from .signals import document_consumption_finished
|
||||||
from .signals import document_consumption_started
|
from .signals import document_consumption_started
|
||||||
|
|
||||||
|
from google.cloud import storage
|
||||||
|
|
||||||
|
|
||||||
class ConsumerError(Exception):
|
class ConsumerError(Exception):
|
||||||
pass
|
pass
|
||||||
@ -431,6 +433,16 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.log("debug", "Initializing Google Cloud Storage: " + str(settings.GCP_SERVICE_ACCOUNT_JSON))
|
||||||
|
# Prepare Google Cloud Storage client
|
||||||
|
# client = storage.Client()
|
||||||
|
client = storage.Client.from_service_account_info(settings.GCP_SERVICE_ACCOUNT_JSON)
|
||||||
|
self.log("debug", "Getting bucket: " + settings.GCP_BUCKET_NAME)
|
||||||
|
self.bucket = client.bucket(settings.GCP_BUCKET_NAME)
|
||||||
|
except Exception as e:
|
||||||
|
self.log("warning", 'Failed to initialize GCP: ' + str(e))
|
||||||
|
|
||||||
self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
|
self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
|
||||||
# now that everything is done, we can start to store the document
|
# now that everything is done, we can start to store the document
|
||||||
# in the system. This will be a transaction and reasonably fast.
|
# in the system. This will be a transaction and reasonably fast.
|
||||||
@ -487,7 +499,7 @@ class Consumer(LoggingMixin):
|
|||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Delete the file only if it was successfully consumed
|
# Delete the file only if it was successfully consumed
|
||||||
self.log("debug", f"Deleting file {self.path}")
|
self.log("debug", f"Deleting file 123 {self.path}")
|
||||||
os.unlink(self.path)
|
os.unlink(self.path)
|
||||||
self.original_path.unlink()
|
self.original_path.unlink()
|
||||||
|
|
||||||
@ -625,6 +637,16 @@ class Consumer(LoggingMixin):
|
|||||||
def _write(self, storage_type, source, target):
|
def _write(self, storage_type, source, target):
|
||||||
with open(source, "rb") as read_file, open(target, "wb") as write_file:
|
with open(source, "rb") as read_file, open(target, "wb") as write_file:
|
||||||
write_file.write(read_file.read())
|
write_file.write(read_file.read())
|
||||||
|
|
||||||
|
with open(source, "rb") as read_file_2:
|
||||||
|
self.log("debug", "GOOGLE_CLOUD_STORAGE:" + str(settings.GOOGLE_CLOUD_STORAGE))
|
||||||
|
# Reference: https://github.com/GoogleCloudPlatform/getting-started-python/blob/main/bookshelf/storage.py#L59
|
||||||
|
if settings.GOOGLE_CLOUD_STORAGE:
|
||||||
|
self.log("debug", "Uploading to Google Cloud Storage")
|
||||||
|
# GCP was initialized earlier
|
||||||
|
blob = self.bucket.blob(str(target))
|
||||||
|
# Reference: https://cloud.google.com/python/docs/reference/storage/latest/google.cloud.storage.blob.Blob#google_cloud_storage_blob_Blob_upload_from_file
|
||||||
|
blob.upload_from_file(read_file_2)
|
||||||
|
|
||||||
def _log_script_outputs(self, completed_process: CompletedProcess):
|
def _log_script_outputs(self, completed_process: CompletedProcess):
|
||||||
"""
|
"""
|
||||||
|
@ -217,6 +217,12 @@ def _parse_beat_schedule() -> Dict:
|
|||||||
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
||||||
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
DEBUG = __get_boolean("PAPERLESS_DEBUG", "NO")
|
||||||
|
|
||||||
|
# Google Cloud Storage
|
||||||
|
GOOGLE_CLOUD_STORAGE = __get_boolean("GOOGLE_CLOUD_STORAGE", "NO")
|
||||||
|
GCP_BUCKET_NAME = os.getenv('GCP_BUCKET_NAME', 'dms_files_local')
|
||||||
|
GCP_SERVICE_ACCOUNT_JSON = os.getenv('GCP_SERVICE_ACCOUNT_JSON', '')
|
||||||
|
if GCP_SERVICE_ACCOUNT_JSON != '':
|
||||||
|
GCP_SERVICE_ACCOUNT_JSON = json.loads(GCP_SERVICE_ACCOUNT_JSON)
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Directories #
|
# Directories #
|
||||||
|
Loading…
x
Reference in New Issue
Block a user