Saves work on an export/import process which is data only
This commit is contained in:
parent
04f52f553a
commit
c3d7793e3c
@ -185,6 +185,13 @@ For PostgreSQL, refer to [Upgrading a PostgreSQL Cluster](https://www.postgresql
|
||||
|
||||
For MariaDB, refer to [Upgrading MariaDB](https://mariadb.com/kb/en/upgrading/)
|
||||
|
||||
You may also use the exporter and importer with the `--data-only` flag.
|
||||
|
||||
!!! warning
|
||||
|
||||
You should not change any settings, especially paths, when doing this or there is a
|
||||
risk of data loss
|
||||
|
||||
## Downgrading Paperless {#downgrade-paperless}
|
||||
|
||||
Downgrades are possible. However, some updates also contain database
|
||||
@ -269,6 +276,7 @@ optional arguments:
|
||||
-sm, --split-manifest
|
||||
-z, --zip
|
||||
-zn, --zip-name
|
||||
--data-only
|
||||
```
|
||||
|
||||
`target` is a folder to which the data gets written. This includes
|
||||
@ -327,6 +335,9 @@ If `-z` or `--zip` is provided, the export will be a zip file
|
||||
in the target directory, named according to the current local date or the
|
||||
value set in `-zn` or `--zip-name`.
|
||||
|
||||
If `--data-only` is provided, only the database will be exported. This option is intended
|
||||
to facilitate database upgrades with needing to clean documents and thumbnails.
|
||||
|
||||
!!! warning
|
||||
|
||||
If exporting with the file name format, there may be errors due to
|
||||
@ -345,6 +356,11 @@ and the script does the rest of the work:
|
||||
document_importer source
|
||||
```
|
||||
|
||||
| Option | Required | Default | Description |
|
||||
| ----------- | -------- | ------- | ------------------------------------------------------------------------- |
|
||||
| source | Yes | N/A | The directory containing an export |
|
||||
| --data-only | No | False | If provided, only import data, do not import document files or thumbnails |
|
||||
|
||||
When you use the provided docker compose script, put the export inside
|
||||
the `export` folder in your paperless source directory. Specify
|
||||
`../export` as the `source`.
|
||||
|
@ -5,7 +5,6 @@ import shutil
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import tqdm
|
||||
from django.conf import settings
|
||||
@ -147,6 +146,13 @@ class Command(BaseCommand):
|
||||
help="Sets the export zip file name",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--data-only",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, only the database will be exported, not files",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
@ -166,6 +172,7 @@ class Command(BaseCommand):
|
||||
self.delete = False
|
||||
self.no_archive = False
|
||||
self.no_thumbnail = False
|
||||
self.data_only = False
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.target = Path(options["target"]).resolve()
|
||||
@ -177,14 +184,14 @@ class Command(BaseCommand):
|
||||
self.no_archive: bool = options["no_archive"]
|
||||
self.no_thumbnail: bool = options["no_thumbnail"]
|
||||
self.zip_export: bool = options["zip"]
|
||||
self.data_only: bool = options["data_only"]
|
||||
self.no_progress_bar: bool = options["no_progress_bar"]
|
||||
|
||||
# If zipping, save the original target for later and
|
||||
# get a temporary directory for the target instead
|
||||
temp_dir = None
|
||||
self.original_target: Optional[Path] = None
|
||||
self.original_target = self.target
|
||||
if self.zip_export:
|
||||
self.original_target = self.target
|
||||
|
||||
settings.SCRATCH_DIR.mkdir(parents=True, exist_ok=True)
|
||||
temp_dir = tempfile.TemporaryDirectory(
|
||||
dir=settings.SCRATCH_DIR,
|
||||
@ -203,7 +210,7 @@ class Command(BaseCommand):
|
||||
|
||||
try:
|
||||
with FileLock(settings.MEDIA_LOCK):
|
||||
self.dump(options["no_progress_bar"])
|
||||
self.dump()
|
||||
|
||||
# We've written everything to the temporary directory in this case,
|
||||
# now make an archive in the original target, with all files stored
|
||||
@ -222,7 +229,7 @@ class Command(BaseCommand):
|
||||
if self.zip_export and temp_dir is not None:
|
||||
temp_dir.cleanup()
|
||||
|
||||
def dump(self, progress_bar_disable=False):
|
||||
def dump(self):
|
||||
# 1. Take a snapshot of what files exist in the current export folder
|
||||
for x in self.target.glob("**/*"):
|
||||
if x.is_file():
|
||||
@ -334,11 +341,15 @@ class Command(BaseCommand):
|
||||
manifest += notes
|
||||
manifest += custom_field_instances
|
||||
|
||||
if self.data_only:
|
||||
self.stdout.write(self.style.NOTICE("Data only export completed"))
|
||||
return
|
||||
|
||||
# 3. Export files from each document
|
||||
for index, document_dict in tqdm.tqdm(
|
||||
enumerate(document_manifest),
|
||||
total=len(document_manifest),
|
||||
disable=progress_bar_disable,
|
||||
disable=self.no_progress_bar,
|
||||
):
|
||||
# 3.1. store files unencrypted
|
||||
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
|
||||
|
@ -57,6 +57,7 @@ class Command(BaseCommand):
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument("source")
|
||||
|
||||
parser.add_argument(
|
||||
"--no-progress-bar",
|
||||
default=False,
|
||||
@ -64,6 +65,13 @@ class Command(BaseCommand):
|
||||
help="If set, the progress bar will not be shown",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--data-only",
|
||||
default=False,
|
||||
action="store_true",
|
||||
help="If set, only the database will be exported, not files",
|
||||
)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
BaseCommand.__init__(self, *args, **kwargs)
|
||||
self.source = None
|
||||
@ -82,17 +90,21 @@ class Command(BaseCommand):
|
||||
if not os.access(self.source, os.R_OK):
|
||||
raise CommandError("That path doesn't appear to be readable")
|
||||
|
||||
for document_dir in [settings.ORIGINALS_DIR, settings.ARCHIVE_DIR]:
|
||||
if document_dir.exists() and document_dir.is_dir():
|
||||
for entry in document_dir.glob("**/*"):
|
||||
if entry.is_dir():
|
||||
continue
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
f"Found file {entry.relative_to(document_dir)}, this might indicate a non-empty installation",
|
||||
),
|
||||
)
|
||||
break
|
||||
# Skip this check if operating only on the database
|
||||
# We can data to exist
|
||||
if not self.data_only:
|
||||
|
||||
for document_dir in [settings.ORIGINALS_DIR, settings.ARCHIVE_DIR]:
|
||||
if document_dir.exists() and document_dir.is_dir():
|
||||
for entry in document_dir.glob("**/*"):
|
||||
if entry.is_dir():
|
||||
continue
|
||||
self.stdout.write(
|
||||
self.style.WARNING(
|
||||
f"Found file {entry.relative_to(document_dir)}, this might indicate a non-empty installation",
|
||||
),
|
||||
)
|
||||
break
|
||||
if (
|
||||
User.objects.exclude(username__in=["consumer", "AnonymousUser"]).count()
|
||||
!= 0
|
||||
@ -113,6 +125,8 @@ class Command(BaseCommand):
|
||||
logging.getLogger().handlers[0].level = logging.ERROR
|
||||
|
||||
self.source = Path(options["source"]).resolve()
|
||||
self.data_only: bool = options["data_only"]
|
||||
self.no_progress_bar: bool = options["no_progress_bar"]
|
||||
|
||||
self.pre_check()
|
||||
|
||||
@ -200,7 +214,12 @@ class Command(BaseCommand):
|
||||
)
|
||||
raise e
|
||||
|
||||
self._import_files_from_manifest(options["no_progress_bar"])
|
||||
if not self.data_only:
|
||||
self._import_files_from_manifest(options["no_progress_bar"])
|
||||
|
||||
else:
|
||||
|
||||
self.stdout.write(self.style.NOTICE("Data only import completed"))
|
||||
|
||||
self.stdout.write("Updating search index...")
|
||||
call_command(
|
||||
|
@ -37,10 +37,16 @@ from documents.sanity_checker import check_sanity
|
||||
from documents.settings import EXPORTER_FILE_NAME
|
||||
from documents.tests.utils import DirectoriesMixin
|
||||
from documents.tests.utils import FileSystemAssertsMixin
|
||||
from documents.tests.utils import SampleDirMixin
|
||||
from documents.tests.utils import paperless_environment
|
||||
|
||||
|
||||
class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
class TestExportImport(
|
||||
DirectoriesMixin,
|
||||
FileSystemAssertsMixin,
|
||||
SampleDirMixin,
|
||||
TestCase,
|
||||
):
|
||||
def setUp(self) -> None:
|
||||
self.target = Path(tempfile.mkdtemp())
|
||||
self.addCleanup(shutil.rmtree, self.target)
|
||||
@ -139,6 +145,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
@override_settings(PASSPHRASE="test")
|
||||
def _do_export(
|
||||
self,
|
||||
*,
|
||||
use_filename_format=False,
|
||||
compare_checksums=False,
|
||||
delete=False,
|
||||
@ -146,6 +153,7 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
no_thumbnail=False,
|
||||
split_manifest=False,
|
||||
use_folder_prefix=False,
|
||||
data_only=False,
|
||||
):
|
||||
args = ["document_exporter", self.target]
|
||||
if use_filename_format:
|
||||
@ -162,6 +170,8 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
args += ["--split-manifest"]
|
||||
if use_folder_prefix:
|
||||
args += ["--use-folder-prefix"]
|
||||
if data_only:
|
||||
args += ["--data-only"]
|
||||
|
||||
call_command(*args)
|
||||
|
||||
@ -794,3 +804,25 @@ class TestExportImport(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
||||
manifest = self._do_export(use_filename_format=True)
|
||||
for obj in manifest:
|
||||
self.assertNotEqual(obj["model"], "auditlog.logentry")
|
||||
|
||||
def test_export_data_only(self):
|
||||
"""
|
||||
GIVEN:
|
||||
- Request to export documents with data only
|
||||
WHEN:
|
||||
- Export command is called
|
||||
THEN:
|
||||
- No document files are exported
|
||||
- Manifest and version are exported
|
||||
"""
|
||||
|
||||
shutil.rmtree(self.dirs.media_dir / "documents")
|
||||
shutil.copytree(
|
||||
self.SAMPLE_DIR / "documents",
|
||||
self.dirs.media_dir / "documents",
|
||||
)
|
||||
|
||||
_ = self._do_export(data_only=True)
|
||||
|
||||
# Manifest and version files only should be present in the exported directory
|
||||
self.assertFileCountInDir(self.target, 2)
|
||||
|
@ -200,6 +200,16 @@ class FileSystemAssertsMixin:
|
||||
|
||||
self.assertEqual(hash1, hash2, "File SHA256 mismatch")
|
||||
|
||||
def assertFileCountInDir(self, path: Union[PathLike, str], count: int):
|
||||
path = Path(path).resolve()
|
||||
self.assertTrue(path.is_dir(), f"Path {path} is not a directory")
|
||||
file_count = len([x for x in path.iterdir() if x.is_file()])
|
||||
self.assertEqual(
|
||||
file_count,
|
||||
count,
|
||||
f"Path {path} contains {file_count} files instead of {count} file",
|
||||
)
|
||||
|
||||
|
||||
class ConsumerProgressMixin:
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user