Saves work on an export/import process which is data only
This commit is contained in:
parent
04f52f553a
commit
c3d7793e3c
@ -185,6 +185,13 @@ For PostgreSQL, refer to [Upgrading a PostgreSQL Cluster](https://www.postgresql
|
|||||||
|
|
||||||
For MariaDB, refer to [Upgrading MariaDB](https://mariadb.com/kb/en/upgrading/)
|
For MariaDB, refer to [Upgrading MariaDB](https://mariadb.com/kb/en/upgrading/)
|
||||||
|
|
||||||
|
You may also use the exporter and importer with the `--data-only` flag.
|
||||||
|
|
||||||
|
!!! warning
|
||||||
|
|
||||||
|
You should not change any settings, especially paths, when doing this or there is a
|
||||||
|
risk of data loss
|
||||||
|
|
||||||
## Downgrading Paperless {#downgrade-paperless}
|
## Downgrading Paperless {#downgrade-paperless}
|
||||||
|
|
||||||
Downgrades are possible. However, some updates also contain database
|
Downgrades are possible. However, some updates also contain database
|
||||||
@ -269,6 +276,7 @@ optional arguments:
|
|||||||
-sm, --split-manifest
|
-sm, --split-manifest
|
||||||
-z, --zip
|
-z, --zip
|
||||||
-zn, --zip-name
|
-zn, --zip-name
|
||||||
|
--data-only
|
||||||
```
|
```
|
||||||
|
|
||||||
`target` is a folder to which the data gets written. This includes
|
`target` is a folder to which the data gets written. This includes
|
||||||
@ -327,6 +335,9 @@ If `-z` or `--zip` is provided, the export will be a zip file
|
|||||||
in the target directory, named according to the current local date or the
|
in the target directory, named according to the current local date or the
|
||||||
value set in `-zn` or `--zip-name`.
|
value set in `-zn` or `--zip-name`.
|
||||||
|
|
||||||
|
If `--data-only` is provided, only the database will be exported. This option is intended
|
||||||
|
to facilitate database upgrades with needing to clean documents and thumbnails.
|
||||||
|
|
||||||
!!! warning
|
!!! warning
|
||||||
|
|
||||||
If exporting with the file name format, there may be errors due to
|
If exporting with the file name format, there may be errors due to
|
||||||
@ -345,6 +356,11 @@ and the script does the rest of the work:
|
|||||||
document_importer source
|
document_importer source
|
||||||
```
|
```
|
||||||
|
|
||||||
|
| Option | Required | Default | Description |
|
||||||
|
| ----------- | -------- | ------- | ------------------------------------------------------------------------- |
|
||||||
|
| source | Yes | N/A | The directory containing an export |
|
||||||
|
| --data-only | No | False | If provided, only import data, do not import document files or thumbnails |
|
||||||
|
|
||||||
When you use the provided docker compose script, put the export inside
|
When you use the provided docker compose script, put the export inside
|
||||||
the `export` folder in your paperless source directory. Specify
|
the `export` folder in your paperless source directory. Specify
|
||||||
`../export` as the `source`.
|
`../export` as the `source`.
|
||||||
|
@ -5,7 +5,6 @@ import shutil
|
|||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import tqdm
|
import tqdm
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@ -147,6 +146,13 @@ class Command(BaseCommand):
|
|||||||
help="Sets the export zip file name",
|
help="Sets the export zip file name",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--data-only",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="If set, only the database will be exported, not files",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
@ -166,6 +172,7 @@ class Command(BaseCommand):
|
|||||||
self.delete = False
|
self.delete = False
|
||||||
self.no_archive = False
|
self.no_archive = False
|
||||||
self.no_thumbnail = False
|
self.no_thumbnail = False
|
||||||
|
self.data_only = False
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
self.target = Path(options["target"]).resolve()
|
self.target = Path(options["target"]).resolve()
|
||||||
@ -177,14 +184,14 @@ class Command(BaseCommand):
|
|||||||
self.no_archive: bool = options["no_archive"]
|
self.no_archive: bool = options["no_archive"]
|
||||||
self.no_thumbnail: bool = options["no_thumbnail"]
|
self.no_thumbnail: bool = options["no_thumbnail"]
|
||||||
self.zip_export: bool = options["zip"]
|
self.zip_export: bool = options["zip"]
|
||||||
|
self.data_only: bool = options["data_only"]
|
||||||
|
self.no_progress_bar: bool = options["no_progress_bar"]
|
||||||
|
|
||||||
# If zipping, save the original target for later and
|
# If zipping, save the original target for later and
|
||||||
# get a temporary directory for the target instead
|
# get a temporary directory for the target instead
|
||||||
temp_dir = None
|
temp_dir = None
|
||||||
self.original_target: Optional[Path] = None
|
|
||||||
if self.zip_export:
|
|
||||||
self.original_target = self.target
|
self.original_target = self.target
|
||||||
|
if self.zip_export:
|
||||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
temp_dir = tempfile.TemporaryDirectory(
|
temp_dir = tempfile.TemporaryDirectory(
|
||||||
dir=settings.SCRATCH_DIR,
|
dir=settings.SCRATCH_DIR,
|
||||||
@ -203,7 +210,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
self.dump(options["no_progress_bar"])
|
self.dump()
|
||||||
|
|
||||||
# We've written everything to the temporary directory in this case,
|
# We've written everything to the temporary directory in this case,
|
||||||
# now make an archive in the original target, with all files stored
|
# now make an archive in the original target, with all files stored
|
||||||
@ -222,7 +229,7 @@ class Command(BaseCommand):
|
|||||||
if self.zip_export and temp_dir is not None:
|
if self.zip_export and temp_dir is not None:
|
||||||
temp_dir.cleanup()
|
temp_dir.cleanup()
|
||||||
|
|
||||||
def dump(self, progress_bar_disable=False):
|
def dump(self):
|
||||||
# 1. Take a snapshot of what files exist in the current export folder
|
# 1. Take a snapshot of what files exist in the current export folder
|
||||||
for x in self.target.glob("**/*"):
|
for x in self.target.glob("**/*"):
|
||||||
if x.is_file():
|
if x.is_file():
|
||||||
@ -334,11 +341,15 @@ class Command(BaseCommand):
|
|||||||
manifest += notes
|
manifest += notes
|
||||||
manifest += custom_field_instances
|
manifest += custom_field_instances
|
||||||
|
|
||||||
|
if self.data_only:
|
||||||
|
self.stdout.write(self.style.NOTICE("Data only export completed"))
|
||||||
|
return
|
||||||
|
|
||||||
# 3. Export files from each document
|
# 3. Export files from each document
|
||||||
for index, document_dict in tqdm.tqdm(
|
for index, document_dict in tqdm.tqdm(
|
||||||
enumerate(document_manifest),
|
enumerate(document_manifest),
|
||||||
total=len(document_manifest),
|
total=len(document_manifest),
|
||||||
disable=progress_bar_disable,
|
disable=self.no_progress_bar,
|
||||||
):
|
):
|
||||||
# 3.1. store files unencrypted
|
# 3.1. store files unencrypted
|
||||||
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
|
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
|
||||||
|
@ -57,6 +57,7 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument("source")
|
parser.add_argument("source")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
@ -64,6 +65,13 @@ class Command(BaseCommand):
|
|||||||
help="If set, the progress bar will not be shown",
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--data-only",
|
||||||
|
default=False,
|
||||||
|
action="store_true",
|
||||||
|
help="If set, only the database will be exported, not files",
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
self.source = None
|
self.source = None
|
||||||
@ -82,6 +90,10 @@ class Command(BaseCommand):
|
|||||||
if not os.access(self.source, os.R_OK):
|
if not os.access(self.source, os.R_OK):
|
||||||
raise CommandError("That path doesn't appear to be readable")
|
raise CommandError("That path doesn't appear to be readable")
|
||||||
|
|
||||||
|
# Skip this check if operating only on the database
|
||||||
|
# We can data to exist
|
||||||
|
if not self.data_only:
|
||||||
|
|
||||||
for document_dir in [settings.ORIGINALS_DIR, settings.ARCHIVE_DIR]:
|
for document_dir in [settings.ORIGINALS_DIR, settings.ARCHIVE_DIR]:
|
||||||
if document_dir.exists() and document_dir.is_dir():
|
if document_dir.exists() and document_dir.is_dir():
|
||||||
for entry in document_dir.glob("**/*"):
|
for entry in document_dir.glob("**/*"):
|
||||||
@ -113,6 +125,8 @@ class Command(BaseCommand):
|
|||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
self.source = Path(options["source"]).resolve()
|
self.source = Path(options["source"]).resolve()
|
||||||
|
self.data_only: bool = options["data_only"]
|
||||||
|
self.no_progress_bar: bool = options["no_progress_bar"]
|
||||||
|
|
||||||
self.pre_check()
|
self.pre_check()
|
||||||
|
|
||||||
@ -200,8 +214,13 @@ class Command(BaseCommand):
|
|||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
if not self.data_only:
|
||||||
self._import_files_from_manifest(options["no_progress_bar"])
|
self._import_files_from_manifest(options["no_progress_bar"])
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
self.stdout.write(self.style.NOTICE("Data only import completed"))
|
||||||
|
|
||||||
self.stdout.write("Updating search index...")
|
self.stdout.write("Updating search index...")
|
||||||
call_command(
|
call_command(
|
||||||
"document_index",
|
"document_index",
|
||||||
|
@ -37,10 +37,16 @@ from documents.sanity_checker import check_sanity
|
|||||||
from documents.settings import EXPORTER_FILE_NAME
|
from documents.settings import EXPORTER_FILE_NAME
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
from documents.tests.utils import FileSystemAssertsMixin
|
from documents.tests.utils import FileSystemAssertsMixin
|
||||||
|
from documents.tests.utils import SampleDirMixin
|
||||||
from documents.tests.utils import paperless_environment
|
from documents.tests.utils import paperless_environment
|
||||||
|
|
||||||
|
|
||||||
class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
class TestExportImport(
|
||||||
|
DirectoriesMixin,
|
||||||
|
FileSystemAssertsMixin,
|
||||||
|
SampleDirMixin,
|
||||||
|
TestCase,
|
||||||
|
):
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
self.target = Path(tempfile.mkdtemp())
|
self.target = Path(tempfile.mkdtemp())
|
||||||
self.addCleanup(shutil.rmtree, self.target)
|
self.addCleanup(shutil.rmtree, self.target)
|
||||||
@ -139,6 +145,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
@override_settings(PASSPHRASE="test")
|
@override_settings(PASSPHRASE="test")
|
||||||
def _do_export(
|
def _do_export(
|
||||||
self,
|
self,
|
||||||
|
*,
|
||||||
use_filename_format=False,
|
use_filename_format=False,
|
||||||
compare_checksums=False,
|
compare_checksums=False,
|
||||||
delete=False,
|
delete=False,
|
||||||
@ -146,6 +153,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
no_thumbnail=False,
|
no_thumbnail=False,
|
||||||
split_manifest=False,
|
split_manifest=False,
|
||||||
use_folder_prefix=False,
|
use_folder_prefix=False,
|
||||||
|
data_only=False,
|
||||||
):
|
):
|
||||||
args = ["document_exporter", self.target]
|
args = ["document_exporter", self.target]
|
||||||
if use_filename_format:
|
if use_filename_format:
|
||||||
@ -162,6 +170,8 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
args += ["--split-manifest"]
|
args += ["--split-manifest"]
|
||||||
if use_folder_prefix:
|
if use_folder_prefix:
|
||||||
args += ["--use-folder-prefix"]
|
args += ["--use-folder-prefix"]
|
||||||
|
if data_only:
|
||||||
|
args += ["--data-only"]
|
||||||
|
|
||||||
call_command(*args)
|
call_command(*args)
|
||||||
|
|
||||||
@ -794,3 +804,25 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|||||||
manifest = self._do_export(use_filename_format=True)
|
manifest = self._do_export(use_filename_format=True)
|
||||||
for obj in manifest:
|
for obj in manifest:
|
||||||
self.assertNotEqual(obj["model"], "auditlog.logentry")
|
self.assertNotEqual(obj["model"], "auditlog.logentry")
|
||||||
|
|
||||||
|
def test_export_data_only(self):
|
||||||
|
"""
|
||||||
|
GIVEN:
|
||||||
|
- Request to export documents with data only
|
||||||
|
WHEN:
|
||||||
|
- Export command is called
|
||||||
|
THEN:
|
||||||
|
- No document files are exported
|
||||||
|
- Manifest and version are exported
|
||||||
|
"""
|
||||||
|
|
||||||
|
shutil.rmtree(self.dirs.media_dir / "documents")
|
||||||
|
shutil.copytree(
|
||||||
|
self.SAMPLE_DIR / "documents",
|
||||||
|
self.dirs.media_dir / "documents",
|
||||||
|
)
|
||||||
|
|
||||||
|
_ = self._do_export(data_only=True)
|
||||||
|
|
||||||
|
# Manifest and version files only should be present in the exported directory
|
||||||
|
self.assertFileCountInDir(self.target, 2)
|
||||||
|
@ -200,6 +200,16 @@ class FileSystemAssertsMixin:
|
|||||||
|
|
||||||
self.assertEqual(hash1, hash2, "File SHA256 mismatch")
|
self.assertEqual(hash1, hash2, "File SHA256 mismatch")
|
||||||
|
|
||||||
|
def assertFileCountInDir(self, path: Union[PathLike, str], count: int):
|
||||||
|
path = Path(path).resolve()
|
||||||
|
self.assertTrue(path.is_dir(), f"Path {path} is not a directory")
|
||||||
|
file_count = len([x for x in path.iterdir() if x.is_file()])
|
||||||
|
self.assertEqual(
|
||||||
|
file_count,
|
||||||
|
count,
|
||||||
|
f"Path {path} contains {file_count} files instead of {count} file",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ConsumerProgressMixin:
|
class ConsumerProgressMixin:
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user