Merge branch 'dev'
This commit is contained in:
		
						commit
						8f5809d1fc
					
				
							
								
								
									
										82
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										82
									
								
								Dockerfile
									
									
									
									
									
								
							@ -1,82 +0,0 @@
 | 
			
		||||
###############################################################################
 | 
			
		||||
### Front end                                                               ###
 | 
			
		||||
###############################################################################
 | 
			
		||||
 | 
			
		||||
FROM node:current AS frontend
 | 
			
		||||
 | 
			
		||||
WORKDIR /usr/src/paperless/src-ui/
 | 
			
		||||
 | 
			
		||||
COPY src-ui/package* ./
 | 
			
		||||
RUN npm install
 | 
			
		||||
 | 
			
		||||
COPY src-ui .
 | 
			
		||||
RUN node_modules/.bin/ng build --prod --output-hashing none --sourceMap=false --output-path dist/paperless-ui
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
### Back end                                                                ###
 | 
			
		||||
###############################################################################
 | 
			
		||||
 | 
			
		||||
FROM ubuntu:20.04
 | 
			
		||||
 | 
			
		||||
WORKDIR /usr/src/paperless/
 | 
			
		||||
 | 
			
		||||
COPY Pipfile* ./
 | 
			
		||||
 | 
			
		||||
#Dependencies
 | 
			
		||||
RUN apt-get update \
 | 
			
		||||
  && DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \
 | 
			
		||||
		build-essential \
 | 
			
		||||
		curl \
 | 
			
		||||
		ghostscript \
 | 
			
		||||
		gnupg \
 | 
			
		||||
		imagemagick \
 | 
			
		||||
		libmagic-dev \
 | 
			
		||||
		libpoppler-cpp-dev \
 | 
			
		||||
		libpq-dev \
 | 
			
		||||
		optipng \
 | 
			
		||||
		python3 \
 | 
			
		||||
		python3-dev \
 | 
			
		||||
		python3-pip \
 | 
			
		||||
		sudo \
 | 
			
		||||
		tesseract-ocr \
 | 
			
		||||
		tesseract-ocr-eng \
 | 
			
		||||
		tesseract-ocr-deu \
 | 
			
		||||
		tesseract-ocr-fra \
 | 
			
		||||
		tesseract-ocr-ita \
 | 
			
		||||
		tesseract-ocr-spa \
 | 
			
		||||
		tzdata \
 | 
			
		||||
		unpaper \
 | 
			
		||||
	&& pip3 install --upgrade pipenv supervisor setuptools \
 | 
			
		||||
	&& pipenv install --system --deploy \
 | 
			
		||||
	&& pipenv --clear \
 | 
			
		||||
	&& apt-get -y purge build-essential python3-pip python3-dev \
 | 
			
		||||
	&& apt-get -y autoremove --purge \
 | 
			
		||||
	&& rm -rf /var/lib/apt/lists/* \
 | 
			
		||||
	&& mkdir /var/log/supervisord /var/run/supervisord
 | 
			
		||||
 | 
			
		||||
# copy scripts
 | 
			
		||||
# this fixes issues with imagemagick and PDF
 | 
			
		||||
COPY docker/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml
 | 
			
		||||
COPY docker/gunicorn.conf.py ./
 | 
			
		||||
COPY docker/supervisord.conf /etc/supervisord.conf
 | 
			
		||||
COPY docker/docker-entrypoint.sh /sbin/docker-entrypoint.sh
 | 
			
		||||
 | 
			
		||||
# copy app
 | 
			
		||||
COPY src/ ./src/
 | 
			
		||||
COPY --from=frontend /usr/src/paperless/src-ui/dist/paperless-ui/ ./src/documents/static/frontend/
 | 
			
		||||
 | 
			
		||||
# add users, setup scripts
 | 
			
		||||
RUN addgroup --gid 1000 paperless \
 | 
			
		||||
	&& useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \
 | 
			
		||||
	&& chown -R paperless:paperless . \
 | 
			
		||||
	&& chmod 755 /sbin/docker-entrypoint.sh
 | 
			
		||||
 | 
			
		||||
WORKDIR /usr/src/paperless/src/
 | 
			
		||||
 | 
			
		||||
RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input
 | 
			
		||||
 | 
			
		||||
VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
 | 
			
		||||
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
 | 
			
		||||
CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisord.conf"]
 | 
			
		||||
 | 
			
		||||
LABEL maintainer="Jonas Winkler <dev@jpwinkler.de>"
 | 
			
		||||
							
								
								
									
										5
									
								
								Pipfile
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								Pipfile
									
									
									
									
									
								
							@ -3,6 +3,11 @@ url = "https://pypi.python.org/simple"
 | 
			
		||||
verify_ssl = true
 | 
			
		||||
name = "pypi"
 | 
			
		||||
 | 
			
		||||
[[source]]
 | 
			
		||||
url = "https://www.piwheels.org/simple"
 | 
			
		||||
verify_ssl = true
 | 
			
		||||
name = "piwheels"
 | 
			
		||||
 | 
			
		||||
[packages]
 | 
			
		||||
django = "~=3.1"
 | 
			
		||||
pillow = "*"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										51
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										51
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							@ -1,7 +1,7 @@
 | 
			
		||||
{
 | 
			
		||||
    "_meta": {
 | 
			
		||||
        "hash": {
 | 
			
		||||
            "sha256": "d6416e6844126b09200b9839a3abdcf3c24ef5cf70052b8f134d8bc804552c17"
 | 
			
		||||
            "sha256": "abc7e5f5a8d075d4b013ceafd06ca07f57e597f053d670f73449ba210511b114"
 | 
			
		||||
        },
 | 
			
		||||
        "pipfile-spec": 6,
 | 
			
		||||
        "requires": {},
 | 
			
		||||
@ -10,6 +10,11 @@
 | 
			
		||||
                "name": "pypi",
 | 
			
		||||
                "url": "https://pypi.python.org/simple",
 | 
			
		||||
                "verify_ssl": true
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "name": "piwheels",
 | 
			
		||||
                "url": "https://www.piwheels.org/simple",
 | 
			
		||||
                "verify_ssl": true
 | 
			
		||||
            }
 | 
			
		||||
        ]
 | 
			
		||||
    },
 | 
			
		||||
@ -102,6 +107,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "filemagic": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:b2fd77411975510e28673220c4b8868ed81b5eb5906339b6f4c233b32122d7d3",
 | 
			
		||||
                "sha256:e684359ef40820fe406f0ebc5bf8a78f89717bdb7fed688af68082d991d6dbf3"
 | 
			
		||||
            ],
 | 
			
		||||
            "index": "pypi",
 | 
			
		||||
@ -142,6 +148,7 @@
 | 
			
		||||
        "langdetect": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:363795ea005f1243c958e953245dac5d814fabdc025c9afa91588c5fa6b2fa83",
 | 
			
		||||
                "sha256:ae53a024643df713274c297c0795dbfb5a16b329902f8e543e7b2d7d45f699e4",
 | 
			
		||||
                "sha256:f37495e63607865e47deed08d78f7f8e58172658216ff954b2f14671bcd87740"
 | 
			
		||||
            ],
 | 
			
		||||
            "index": "pypi",
 | 
			
		||||
@ -162,6 +169,7 @@
 | 
			
		||||
                "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb",
 | 
			
		||||
                "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc",
 | 
			
		||||
                "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac",
 | 
			
		||||
                "sha256:5ddd1dfa2be066595c1993165b4cae84b9866b12339d0c903db7f21a094324a3",
 | 
			
		||||
                "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83",
 | 
			
		||||
                "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36",
 | 
			
		||||
                "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387",
 | 
			
		||||
@ -189,7 +197,8 @@
 | 
			
		||||
        },
 | 
			
		||||
        "pathtools": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0"
 | 
			
		||||
                "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0",
 | 
			
		||||
                "sha256:d77d982475e87f32b82157a43b09f0a5ef3e66c1d8f3c7eb8d2580e783cd8202"
 | 
			
		||||
            ],
 | 
			
		||||
            "version": "==0.1.2"
 | 
			
		||||
        },
 | 
			
		||||
@ -217,6 +226,7 @@
 | 
			
		||||
                "sha256:2fb113757a369a6cdb189f8df3226e995acfed0a8919a72416626af1a0a71140",
 | 
			
		||||
                "sha256:4b0ef2470c4979e345e4e0cc1bbac65fda11d0d7b789dbac035e4c6ce3f98adb",
 | 
			
		||||
                "sha256:59e903ca800c8cfd1ebe482349ec7c35687b95e98cefae213e271c8c7fffa021",
 | 
			
		||||
                "sha256:5a3342d34289715928c914ee7f389351eb37fa4857caa9297fc7948f2ed3e53d",
 | 
			
		||||
                "sha256:5abd653a23c35d980b332bc0431d39663b1709d64142e3652890df4c9b6970f6",
 | 
			
		||||
                "sha256:5f9403af9c790cc18411ea398a6950ee2def2a830ad0cfe6dc9122e6d528b302",
 | 
			
		||||
                "sha256:6b4a8fd632b4ebee28282a9fef4c341835a1aa8671e2770b6f89adc8e8c2703c",
 | 
			
		||||
@ -274,8 +284,10 @@
 | 
			
		||||
                "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4",
 | 
			
		||||
                "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449",
 | 
			
		||||
                "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da",
 | 
			
		||||
                "sha256:d9f3a909b59ac4a3ca9beb77716f4bce627276edb039a71d4e9ec4b7548536a0",
 | 
			
		||||
                "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a",
 | 
			
		||||
                "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c",
 | 
			
		||||
                "sha256:e7f5a465c6431c0ad8d4e69603ee3306e521a09d3c6af76a16bdb62946bdddf0",
 | 
			
		||||
                "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb",
 | 
			
		||||
                "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4",
 | 
			
		||||
                "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5"
 | 
			
		||||
@ -285,7 +297,8 @@
 | 
			
		||||
        },
 | 
			
		||||
        "pyocr": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179"
 | 
			
		||||
                "sha256:fa15adc7e1cf0d345a2990495fe125a947c6e09a60ddba0256a1c14b2e603179",
 | 
			
		||||
                "sha256:fd602af17b6e21985669aadc058a95f343ff921e962ed4aa6520ded32e4d1301"
 | 
			
		||||
            ],
 | 
			
		||||
            "index": "pypi",
 | 
			
		||||
            "version": "==0.7.2"
 | 
			
		||||
@ -316,7 +329,10 @@
 | 
			
		||||
        },
 | 
			
		||||
        "python-levenshtein": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1"
 | 
			
		||||
                "sha256:033a11de5e3d19ea25c9302d11224e1a1898fe5abd23c61c7c360c25195e3eb1",
 | 
			
		||||
                "sha256:15e26882728c29ccdf74cfc6ac4b49fc22c08b44d152348cb0eb1ec4f3dbf9df",
 | 
			
		||||
                "sha256:3df5e5eb144570ecf5ad38864a2393068798328c7f05e7b167a49391d36a2db1",
 | 
			
		||||
                "sha256:7f049b3ddc4b525bd469febafb98bf5202f789b722e0e4ccbec2ffbe8c07d7b4"
 | 
			
		||||
            ],
 | 
			
		||||
            "index": "pypi",
 | 
			
		||||
            "version": "==0.12.0"
 | 
			
		||||
@ -331,6 +347,7 @@
 | 
			
		||||
        "redis": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:0e7e0cfca8660dea8b7d5cd8c4f6c5e29e11f31158c0b0ae91a397f00e5a05a2",
 | 
			
		||||
                "sha256:3f1c7f166fa6c803613eec222224848a80f5e5b9c6af3aa82461506643034a7a",
 | 
			
		||||
                "sha256:432b788c4530cfe16d8d943a09d40ca6c16149727e4afe8c2c9d5580c59d9f24"
 | 
			
		||||
            ],
 | 
			
		||||
            "index": "pypi",
 | 
			
		||||
@ -360,7 +377,9 @@
 | 
			
		||||
                "sha256:749078d1eb89484db5f34b4012092ad14b327944ee7f1c4f74d6279a6e4d1884",
 | 
			
		||||
                "sha256:7913bd25f4ab274ba37bc97ad0e21c31004224ccb02765ad984eef43e04acc6c",
 | 
			
		||||
                "sha256:7a25fcbeae08f96a754b45bdc050e1fb94b95cab046bf56b016c25e9ab127b3e",
 | 
			
		||||
                "sha256:80ef188c0e47a6c964eed71c55a73c245f8daf9f0a4a9d804e91275afb468ca4",
 | 
			
		||||
                "sha256:83d6b356e116ca119db8e7c6fc2983289d87b27b3fac238cfe5dca529d884562",
 | 
			
		||||
                "sha256:842fb985b2b99a82a2b145b6bbd588c5f5cfd83693402920fcb985d515794666",
 | 
			
		||||
                "sha256:8b882a78c320478b12ff024e81dc7d43c1462aa4a3341c754ee65d857a521f85",
 | 
			
		||||
                "sha256:8f6a2229e8ad946e36815f2a03386bb8353d4bde368fdf8ca5f0cb97264d3b5c",
 | 
			
		||||
                "sha256:9801c4c1d9ae6a70aeb2128e5b4b68c45d4f0af0d1535500884d644fa9b768c6",
 | 
			
		||||
@ -384,6 +403,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "scikit-learn": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:090bbf144fd5823c1f2efa3e1a9bf180295b24294ca8f478e75b40ed54f8036e",
 | 
			
		||||
                "sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca",
 | 
			
		||||
                "sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc",
 | 
			
		||||
                "sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea",
 | 
			
		||||
@ -423,6 +443,7 @@
 | 
			
		||||
                "sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62",
 | 
			
		||||
                "sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d",
 | 
			
		||||
                "sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437",
 | 
			
		||||
                "sha256:b5e9d3e4474644915809d6aa1416ff20430a3ed9ae723a5d295da5ddb24985e2",
 | 
			
		||||
                "sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2",
 | 
			
		||||
                "sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54",
 | 
			
		||||
                "sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474",
 | 
			
		||||
@ -468,6 +489,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "watchdog": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:034c85530b647486e8c8477410fe79476511282658f2ce496f97106d9e5acfb8",
 | 
			
		||||
                "sha256:4214e1379d128b0588021880ccaf40317ee156d4603ac388b9adcf29165e0c04"
 | 
			
		||||
            ],
 | 
			
		||||
            "index": "pypi",
 | 
			
		||||
@ -561,6 +583,7 @@
 | 
			
		||||
                "sha256:29a6272fec10623fcbe158fdf9abc7a5fa032048ac1d8631f14b50fbfc10d17f",
 | 
			
		||||
                "sha256:2b31f46bf7b31e6aa690d4c7a3d51bb262438c6dcb0d528adde446531d0d3bb7",
 | 
			
		||||
                "sha256:2d43af2be93ffbad25dd959899b5b809618a496926146ce98ee0b23683f8c51c",
 | 
			
		||||
                "sha256:3188a7dfd96f734a7498f37cde6598b1e9c084f1ca68bc1aa04e88db31168ab6",
 | 
			
		||||
                "sha256:381ead10b9b9af5f64646cd27107fb27b614ee7040bb1226f9c07ba96625cbb5",
 | 
			
		||||
                "sha256:47a11bdbd8ada9b7ee628596f9d97fbd3851bd9999d398e9436bd67376dbece7",
 | 
			
		||||
                "sha256:4d6a42744139a7fa5b46a264874a781e8694bb32f1d76d8137b68138686f1729",
 | 
			
		||||
@ -586,7 +609,8 @@
 | 
			
		||||
                "sha256:c851b35fc078389bc16b915a0a7c1d5923e12e2c5aeec58c52f4aa8085ac8237",
 | 
			
		||||
                "sha256:cb7df71de0af56000115eafd000b867d1261f786b5eebd88a0ca6360cccfaca7",
 | 
			
		||||
                "sha256:cedb2f9e1f990918ea061f28a0f0077a07702e3819602d3507e2ff98c8d20636",
 | 
			
		||||
                "sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8"
 | 
			
		||||
                "sha256:e8caf961e1b1a945db76f1b5fa9c91498d15f545ac0ababbe575cfab185d3bd8",
 | 
			
		||||
                "sha256:ef221855191457fffeb909d5787d1807800ab4d0111f089e6c93ee68f577634d"
 | 
			
		||||
            ],
 | 
			
		||||
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
 | 
			
		||||
            "version": "==5.3"
 | 
			
		||||
@ -608,6 +632,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "docopt": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:15fde8252aa9f2804171014d50d069ffbf42c7a50b7d74bcbb82bfd5700fcfc2",
 | 
			
		||||
                "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"
 | 
			
		||||
            ],
 | 
			
		||||
            "version": "==0.6.2"
 | 
			
		||||
@ -638,11 +663,11 @@
 | 
			
		||||
        },
 | 
			
		||||
        "faker": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:6afc461ab3f779c9c16e299fc731d775e39ea7e8e063b3053ee359ae198a15ca",
 | 
			
		||||
                "sha256:ce1c38823eb0f927567cde5bf2e7c8ca565c7a70316139342050ce2ca74b4026"
 | 
			
		||||
                "sha256:4d038ba51ae5e0a956d79cadd684d856e5750bfd608b61dad1807f8f08b1da49",
 | 
			
		||||
                "sha256:f260f0375a44cd1e1a735c9b8c9b914304f607b5eef431d20e098c7c2f5b50a6"
 | 
			
		||||
            ],
 | 
			
		||||
            "markers": "python_version >= '3.5'",
 | 
			
		||||
            "version": "==4.14.2"
 | 
			
		||||
            "version": "==4.16.0"
 | 
			
		||||
        },
 | 
			
		||||
        "filelock": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
@ -653,6 +678,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "idna": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:4a57a6379512ade94fa99e2fa46d3cd0f2f553040548d0e2958c6ed90ee48226",
 | 
			
		||||
                "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
 | 
			
		||||
                "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
 | 
			
		||||
            ],
 | 
			
		||||
@ -670,12 +696,14 @@
 | 
			
		||||
        "iniconfig": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3",
 | 
			
		||||
                "sha256:8647b85c03813b8680f4ae9c9db2fd7293f8591ea536a10d73d90f6eb4b10aac",
 | 
			
		||||
                "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"
 | 
			
		||||
            ],
 | 
			
		||||
            "version": "==1.1.1"
 | 
			
		||||
        },
 | 
			
		||||
        "jinja2": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:3f172970d5670703bd3812e8ca6459a9a7e069fa8e51b40195f83c81db191ec4",
 | 
			
		||||
                "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0",
 | 
			
		||||
                "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"
 | 
			
		||||
            ],
 | 
			
		||||
@ -689,8 +717,10 @@
 | 
			
		||||
                "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235",
 | 
			
		||||
                "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5",
 | 
			
		||||
                "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42",
 | 
			
		||||
                "sha256:19536834abffb3fa155017053c607cb835b2ecc6a3a2554a88043d991dffb736",
 | 
			
		||||
                "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff",
 | 
			
		||||
                "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b",
 | 
			
		||||
                "sha256:3d61f15e39611aacd91b7e71d903787da86d9e80896e683c0103fced9add7834",
 | 
			
		||||
                "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1",
 | 
			
		||||
                "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e",
 | 
			
		||||
                "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183",
 | 
			
		||||
@ -700,6 +730,7 @@
 | 
			
		||||
                "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15",
 | 
			
		||||
                "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1",
 | 
			
		||||
                "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e",
 | 
			
		||||
                "sha256:7952deddf24b85c88dab48f6ec366ac6e39d2761b5280f2f9594911e03fcd064",
 | 
			
		||||
                "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b",
 | 
			
		||||
                "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905",
 | 
			
		||||
                "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735",
 | 
			
		||||
@ -795,6 +826,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "pytest-env": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:33b4030383a021924fe3f3ba5ca4311990d8b1d02ca77389c2be020c4500f96a",
 | 
			
		||||
                "sha256:7e94956aef7f2764f3c147d216ce066bf6c42948bb9e293169b1b1c880a580c2"
 | 
			
		||||
            ],
 | 
			
		||||
            "index": "pypi",
 | 
			
		||||
@ -802,6 +834,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "pytest-forked": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:2d1bfc93ab65a28324eb0a63503bfb500c2da6916efede7a24b43a04970fe63c",
 | 
			
		||||
                "sha256:6aa9ac7e00ad1a539c41bec6d21011332de671e938c7637378ec9710204e37ca",
 | 
			
		||||
                "sha256:dc4147784048e70ef5d437951728825a131b81714b398d5d52f17c7c144d8815"
 | 
			
		||||
            ],
 | 
			
		||||
@ -810,6 +843,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "pytest-sugar": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:67a55a83c7b2717ad607704d3fe9004bb6543b54017ef82f9c6590acc38c1aec",
 | 
			
		||||
                "sha256:b1b2186b0a72aada6859bea2a5764145e3aaa2c1cfbb23c3a19b5f7b697563d3"
 | 
			
		||||
            ],
 | 
			
		||||
            "index": "pypi",
 | 
			
		||||
@ -927,6 +961,7 @@
 | 
			
		||||
        },
 | 
			
		||||
        "termcolor": {
 | 
			
		||||
            "hashes": [
 | 
			
		||||
                "sha256:19b1225d03bfb56571484caaa8521d8ec6e2473ae1640c9f48a48dda49417706",
 | 
			
		||||
                "sha256:1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
 | 
			
		||||
            ],
 | 
			
		||||
            "version": "==1.1.0"
 | 
			
		||||
 | 
			
		||||
@ -2,27 +2,25 @@
 | 
			
		||||
### Back end                                                                ###
 | 
			
		||||
###############################################################################
 | 
			
		||||
 | 
			
		||||
FROM ubuntu:20.04
 | 
			
		||||
FROM python:3.7-slim
 | 
			
		||||
 | 
			
		||||
WORKDIR /usr/src/paperless/
 | 
			
		||||
 | 
			
		||||
COPY Pipfile* ./
 | 
			
		||||
COPY requirements.txt ./
 | 
			
		||||
 | 
			
		||||
#Dependencies
 | 
			
		||||
RUN apt-get update \
 | 
			
		||||
  && DEBIAN_FRONTEND="noninteractive" apt-get -y --no-install-recommends install \
 | 
			
		||||
  && apt-get -y --no-install-recommends install \
 | 
			
		||||
		build-essential \
 | 
			
		||||
		curl \
 | 
			
		||||
		ghostscript \
 | 
			
		||||
		gnupg \
 | 
			
		||||
		imagemagick \
 | 
			
		||||
		libatlas-base-dev \
 | 
			
		||||
		libmagic-dev \
 | 
			
		||||
		libpoppler-cpp-dev \
 | 
			
		||||
		libpq-dev \
 | 
			
		||||
		optipng \
 | 
			
		||||
		python3 \
 | 
			
		||||
		python3-dev \
 | 
			
		||||
		python3-pip \
 | 
			
		||||
		sudo \
 | 
			
		||||
		tesseract-ocr \
 | 
			
		||||
		tesseract-ocr-eng \
 | 
			
		||||
@ -32,10 +30,9 @@ RUN apt-get update \
 | 
			
		||||
		tesseract-ocr-spa \
 | 
			
		||||
		tzdata \
 | 
			
		||||
		unpaper \
 | 
			
		||||
	&& pip3 install --upgrade pipenv supervisor setuptools \
 | 
			
		||||
	&& pipenv install --system --deploy \
 | 
			
		||||
	&& pipenv --clear \
 | 
			
		||||
	&& apt-get -y purge build-essential python3-pip python3-dev \
 | 
			
		||||
	&& pip3 install --upgrade supervisor setuptools \
 | 
			
		||||
	&& pip install --no-cache-dir -r requirements.txt \
 | 
			
		||||
	&& apt-get -y purge build-essential \
 | 
			
		||||
	&& apt-get -y autoremove --purge \
 | 
			
		||||
	&& rm -rf /var/lib/apt/lists/* \
 | 
			
		||||
	&& mkdir /var/log/supervisord /var/run/supervisord
 | 
			
		||||
 | 
			
		||||
@ -8,16 +8,40 @@ Administration
 | 
			
		||||
Making backups
 | 
			
		||||
##############
 | 
			
		||||
 | 
			
		||||
.. warning::
 | 
			
		||||
Multiple options exist for making backups of your paperless instance,
 | 
			
		||||
depending on how you installed paperless.
 | 
			
		||||
 | 
			
		||||
    This section is not updated to paperless-ng yet, the exporter is a valid tool
 | 
			
		||||
    for backups though.
 | 
			
		||||
Before making backups, make sure that paperless is not running.
 | 
			
		||||
 | 
			
		||||
So you're bored of this whole project, or you want to make a remote backup of
 | 
			
		||||
your files for whatever reason.  This is easy to do, simply use the
 | 
			
		||||
:ref:`exporter <utilities-exporter>` to dump your documents and database out
 | 
			
		||||
into an arbitrary directory.
 | 
			
		||||
Options available to any installation of paperless:
 | 
			
		||||
 | 
			
		||||
*   Use the :ref:`document exporter <utilities-exporter>`.
 | 
			
		||||
    The document exporter exports all your documents, thumbnails and
 | 
			
		||||
    metadata to a specific folder. You may import your documents into a
 | 
			
		||||
    fresh instance of paperless again or store your documents in another
 | 
			
		||||
    DMS with this export.
 | 
			
		||||
 | 
			
		||||
Options available to docker installations:
 | 
			
		||||
 | 
			
		||||
*   Backup the docker volumes. These usually reside within
 | 
			
		||||
    ``/var/lib/docker/volumes`` on the host and you need to be root in order
 | 
			
		||||
    to access them.
 | 
			
		||||
 | 
			
		||||
    Paperless uses 3 volumes:
 | 
			
		||||
 | 
			
		||||
    *   ``paperless_media``: This is where your documents are stored.
 | 
			
		||||
    *   ``paperless_data``: This is where auxilliary data is stored. This
 | 
			
		||||
        folder also contains the SQLite database, if you use it.
 | 
			
		||||
    *   ``paperless_pgdata``: Exists only if you use PostgreSQL and contains
 | 
			
		||||
        the database.
 | 
			
		||||
 | 
			
		||||
Options available to bare-metal and non-docker installations:
 | 
			
		||||
 | 
			
		||||
*   Backup the entire paperless folder. This ensures that if your paperless instance
 | 
			
		||||
    crashes at some point or your disk fails, you can simply copy the folder back
 | 
			
		||||
    into place and it works.
 | 
			
		||||
 | 
			
		||||
    When using PostgreSQL, you'll also have to backup the database.
 | 
			
		||||
 | 
			
		||||
.. _migrating-restoring:
 | 
			
		||||
 | 
			
		||||
@ -25,6 +49,8 @@ Restoring
 | 
			
		||||
=========
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _administration-updating:
 | 
			
		||||
 | 
			
		||||
Updating paperless
 | 
			
		||||
 | 
			
		||||
@ -128,6 +128,8 @@ consumer.  Once complete, you should see the newly-created document,
 | 
			
		||||
automatically tagged with the appropriate data.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.. _advanced-automatic_matching:
 | 
			
		||||
 | 
			
		||||
Automatic matching
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
@ -175,8 +177,6 @@ then put the path to that script in ``paperless.conf`` with the variable name
 | 
			
		||||
of either ``PAPERLESS_PRE_CONSUME_SCRIPT`` or
 | 
			
		||||
``PAPERLESS_POST_CONSUME_SCRIPT``.
 | 
			
		||||
 | 
			
		||||
.. TODO HYPEREF TO CONFIG
 | 
			
		||||
 | 
			
		||||
.. important::
 | 
			
		||||
 | 
			
		||||
    These scripts are executed in a **blocking** process, which means that if
 | 
			
		||||
 | 
			
		||||
@ -96,6 +96,8 @@ paperless-ng 0.9.0
 | 
			
		||||
    sqlite.
 | 
			
		||||
  * ``PAPERLESS_OCR_THREADS`` is gone and replaced with ``PAPERLESS_TASK_WORKERS`` and
 | 
			
		||||
    ``PAPERLESS_THREADS_PER_WORKER``. Refer to the config example for details.
 | 
			
		||||
  * ``PAPERLESS_OPTIMIZE_THUMBNAILS`` allows you to disable or enable thumbnail
 | 
			
		||||
    optimization. This is useful on less powerful devices.
 | 
			
		||||
 | 
			
		||||
* Many more small changes here and there. The usual stuff.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										34
									
								
								docs/faq.rst
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								docs/faq.rst
									
									
									
									
									
								
							@ -23,27 +23,35 @@ is
 | 
			
		||||
 | 
			
		||||
**Q:** *Will paperless-ng run on Raspberry Pi?*
 | 
			
		||||
 | 
			
		||||
**A:** The short answer is yes. The long answer is that certain parts of
 | 
			
		||||
**A:** The short answer is yes. I've tested it on a Raspberry Pi 3 B.
 | 
			
		||||
The long answer is that certain parts of
 | 
			
		||||
Paperless will run very slow, such as the tesseract OCR. On Rasperry Pi,
 | 
			
		||||
try to OCR documents before feeding them into paperless so that paperless can
 | 
			
		||||
reuse the text. The web interface should be alot snappier, since it runs
 | 
			
		||||
in your browser and paperless has to do much less work to serve the data.
 | 
			
		||||
 | 
			
		||||
.. note::
 | 
			
		||||
    
 | 
			
		||||
    Consider setting ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to false to speed up
 | 
			
		||||
    the consumption process. This takes quite a bit of time on Raspberry Pi.
 | 
			
		||||
 | 
			
		||||
.. note::
 | 
			
		||||
    
 | 
			
		||||
    Updating the :ref:`automatic matching algorithm <advanced-automatic_matching>`
 | 
			
		||||
    takes quite a bit of time. However, the update mechanism checks if your
 | 
			
		||||
    data has changed before doing the heavy lifting. If you experience the 
 | 
			
		||||
    algorithm taking too much cpu time, consider changing the schedule in the
 | 
			
		||||
    admin interface to daily or weekly. You can also manually invoke the task
 | 
			
		||||
    by changing the date and time of the next run to today/now.
 | 
			
		||||
 | 
			
		||||
    The actual matching of the algorithm is fast and works on Raspberry Pi as 
 | 
			
		||||
    well as on any other device.
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
**Q:** *How do I install paperless-ng on Raspberry Pi?*
 | 
			
		||||
 | 
			
		||||
**A:** There is not docker image for ARM available. If you know how to build
 | 
			
		||||
that automatically, I'm all ears. For now, you have to grab the latest release
 | 
			
		||||
archive from the project page and build the image yourself. The release comes
 | 
			
		||||
with the front end already compiled, so you don't have to do this on the Pi.
 | 
			
		||||
 | 
			
		||||
You may encounter some issues during the build:
 | 
			
		||||
 | 
			
		||||
.. code:: shell-session
 | 
			
		||||
 | 
			
		||||
    W: GPG error: http://ports.ubuntu.com/ubuntu-ports focal InRelease: At least one invalid signature was encountered.
 | 
			
		||||
    E: The repository 'http://ports.ubuntu.com/ubuntu-ports focal InRelease' is not signed.
 | 
			
		||||
    N: Updating from such a repository can't be done securely, and is therefore disabled by default.
 | 
			
		||||
    N: See apt-secure(8) manpage for repository creation and user configuration details.
 | 
			
		||||
 | 
			
		||||
If this happens, look at `this thread <https://askubuntu.com/questions/1263284/>`:_.
 | 
			
		||||
You will need to update docker to the latest version to fix this issue.
 | 
			
		||||
 | 
			
		||||
@ -10,7 +10,7 @@
 | 
			
		||||
# This is required for processing scheduled tasks such as email fetching, index
 | 
			
		||||
# optimization and for training the automatic document matcher.
 | 
			
		||||
# Defaults to localhost:6379.
 | 
			
		||||
#PAPERLESS_REDIS="redis://localhost:6379"
 | 
			
		||||
#PAPERLESS_REDIS=redis://localhost:6379
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
@ -22,15 +22,15 @@
 | 
			
		||||
# configuration for this is already done inside the docker-compose.env file.
 | 
			
		||||
 | 
			
		||||
#Set PAPERLESS_DBHOST and postgresql will be used instead of mysql.
 | 
			
		||||
#PAPERLESS_DBHOST="localhost"
 | 
			
		||||
#PAPERLESS_DBHOST=localhost
 | 
			
		||||
 | 
			
		||||
#Adjust port if necessary
 | 
			
		||||
#PAPERLESS_DBPORT=
 | 
			
		||||
 | 
			
		||||
#name, user and pass all default to "paperless"
 | 
			
		||||
#PAPERLESS_DBNAME="paperless"
 | 
			
		||||
#PAPERLESS_DBUSER="paperless"
 | 
			
		||||
#PAPERLESS_DBPASS="paperless"
 | 
			
		||||
#PAPERLESS_DBNAME=paperless
 | 
			
		||||
#PAPERLESS_DBUSER=paperless
 | 
			
		||||
#PAPERLESS_DBPASS=paperless
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
@ -40,23 +40,23 @@
 | 
			
		||||
# This where your documents should go to be consumed.  Make sure that it exists
 | 
			
		||||
# and that the user running the paperless service can read/write its contents
 | 
			
		||||
# before you start Paperless.
 | 
			
		||||
PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
PAPERLESS_CONSUMPTION_DIR=../consume
 | 
			
		||||
 | 
			
		||||
# This is where paperless stores all its data (search index, sqlite database,
 | 
			
		||||
# classification model, etc).
 | 
			
		||||
#PAPERLESS_DATA_DIR="../data"
 | 
			
		||||
#PAPERLESS_DATA_DIR=../data
 | 
			
		||||
 | 
			
		||||
# This is where your documents and thumbnails are stored.
 | 
			
		||||
#PAPERLESS_MEDIA_ROOT="../media"
 | 
			
		||||
#PAPERLESS_MEDIA_ROOT=../media
 | 
			
		||||
 | 
			
		||||
# Override the default STATIC_ROOT here.  This is where all static files
 | 
			
		||||
# created using "collectstatic" manager command are stored.
 | 
			
		||||
#PAPERLESS_STATICDIR="../static"
 | 
			
		||||
#PAPERLESS_STATICDIR=../static
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Override the STATIC_URL here.  Unless you're hosting Paperless off a
 | 
			
		||||
# subdomain like /paperless/, you probably don't need to change this.
 | 
			
		||||
#PAPERLESS_STATIC_URL="/static/"
 | 
			
		||||
#PAPERLESS_STATIC_URL=/static/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Specify a filename format for the document (directories are supported)
 | 
			
		||||
@ -69,7 +69,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
# * {tags[INDEX]} If your tags are strings, select the tag by index
 | 
			
		||||
# Uniqueness of filenames is ensured, as an incrementing counter is attached
 | 
			
		||||
# to each filename.
 | 
			
		||||
#PAPERLESS_FILENAME_FORMAT=""
 | 
			
		||||
#PAPERLESS_FILENAME_FORMAT=
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
####                              Security                                 ####
 | 
			
		||||
@ -77,10 +77,12 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
 | 
			
		||||
# Controls whether django's debug mode is enabled. Disable this on production
 | 
			
		||||
# systems. Debug mode is disabled by default.
 | 
			
		||||
#PAPERLESS_DEBUG="false"
 | 
			
		||||
#PAPERLESS_DEBUG=false
 | 
			
		||||
 | 
			
		||||
# GnuPG encryption is deprecated and will be removed in future versions.
 | 
			
		||||
#
 | 
			
		||||
# Dont use it. It does not provide any security at all.
 | 
			
		||||
#
 | 
			
		||||
# Paperless can be instructed to attempt to encrypt your PDF files with GPG
 | 
			
		||||
# using the PAPERLESS_PASSPHRASE specified below.  If however you're not
 | 
			
		||||
# concerned about encrypting these files (for example if you have disk
 | 
			
		||||
@ -93,13 +95,13 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
# you've since changed it to a new one.
 | 
			
		||||
#
 | 
			
		||||
# The default is to not use encryption at all.
 | 
			
		||||
#PAPERLESS_PASSPHRASE="secret"
 | 
			
		||||
#PAPERLESS_PASSPHRASE=secret
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# The secret key has a default that should be fine so long as you're hosting
 | 
			
		||||
# Paperless on a closed network.  However, if you're putting this anywhere
 | 
			
		||||
# public, you should change the key to something unique and verbose.
 | 
			
		||||
#PAPERLESS_SECRET_KEY="change-me"
 | 
			
		||||
#PAPERLESS_SECRET_KEY=change-me
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# If you're planning on putting Paperless on the open internet, then you
 | 
			
		||||
@ -109,19 +111,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
#
 | 
			
		||||
# Just remember that this is a comma-separated list, so "example.com" is fine,
 | 
			
		||||
# as is "example.com,www.example.com", but NOT " example.com" or "example.com,"
 | 
			
		||||
#PAPERLESS_ALLOWED_HOSTS="example.com,www.example.com"
 | 
			
		||||
#PAPERLESS_ALLOWED_HOSTS=example.com,www.example.com
 | 
			
		||||
 | 
			
		||||
# If you decide to use the Paperless API in an ajax call, you need to add your
 | 
			
		||||
# servers to the list of allowed hosts that can do CORS calls. By default
 | 
			
		||||
# Paperless allows calls from localhost:8080, but you'd like to change that,
 | 
			
		||||
# you can set this value to a comma-separated list.
 | 
			
		||||
#PAPERLESS_CORS_ALLOWED_HOSTS="localhost:8080,example.com,localhost:8000"
 | 
			
		||||
#PAPERLESS_CORS_ALLOWED_HOSTS=localhost:8080,example.com,localhost:8000
 | 
			
		||||
 | 
			
		||||
# To host paperless under a subpath url like example.com/paperless you set
 | 
			
		||||
# this value to /paperless. No trailing slash!
 | 
			
		||||
#
 | 
			
		||||
# https://docs.djangoproject.com/en/1.11/ref/settings/#force-script-name
 | 
			
		||||
#PAPERLESS_FORCE_SCRIPT_NAME=""
 | 
			
		||||
#PAPERLESS_FORCE_SCRIPT_NAME=
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
####                          Software Tweaks                              ####
 | 
			
		||||
@ -158,14 +160,19 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
 | 
			
		||||
# When the consumer detects a duplicate document, it will not touch the
 | 
			
		||||
# original document. This default behavior can be changed here.
 | 
			
		||||
#PAPERLESS_CONSUMER_DELETE_DUPLICATES="false"
 | 
			
		||||
#PAPERLESS_CONSUMER_DELETE_DUPLICATES=false
 | 
			
		||||
 | 
			
		||||
# Use optipng to optimize thumbnails. This usually reduces the sice of
 | 
			
		||||
# thumbnails by about 20%, but uses considerable compute time during
 | 
			
		||||
# consumption.
 | 
			
		||||
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
 | 
			
		||||
 | 
			
		||||
# After a document is consumed, Paperless can trigger an arbitrary script if
 | 
			
		||||
# you like.  This script will be passed a number of arguments for you to work
 | 
			
		||||
# with.  The default is blank, which means nothing will be executed.  For more
 | 
			
		||||
# information, take a look at the docs:
 | 
			
		||||
# http://paperless.readthedocs.org/en/latest/consumption.html#hooking-into-the-consumption-process
 | 
			
		||||
#PAPERLESS_POST_CONSUME_SCRIPT="/path/to/an/arbitrary/script.sh"
 | 
			
		||||
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
			
		||||
 | 
			
		||||
# By default, paperless will check the document text for document date information.
 | 
			
		||||
# Uncomment the line below to enable checking the document filename for date
 | 
			
		||||
@ -173,7 +180,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
# https://dateparser.readthedocs.io/en/latest/#settings. The filename will be
 | 
			
		||||
# checked first, and if nothing is found, the document text will be checked
 | 
			
		||||
# as normal.
 | 
			
		||||
#PAPERLESS_FILENAME_DATE_ORDER="YMD"
 | 
			
		||||
#PAPERLESS_FILENAME_DATE_ORDER=YMD
 | 
			
		||||
 | 
			
		||||
# Sometimes devices won't create filenames which can be parsed properly
 | 
			
		||||
# by the filename parser (see
 | 
			
		||||
@ -243,7 +250,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
 | 
			
		||||
# By default Paperless does not OCR a document if the text can be retrieved from
 | 
			
		||||
# the document directly. Set to true to always OCR documents.
 | 
			
		||||
#PAPERLESS_OCR_ALWAYS="false"
 | 
			
		||||
#PAPERLESS_OCR_ALWAYS=false
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
###############################################################################
 | 
			
		||||
@ -271,7 +278,7 @@ PAPERLESS_CONSUMPTION_DIR="../consume"
 | 
			
		||||
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
 | 
			
		||||
 | 
			
		||||
# Ghostscript
 | 
			
		||||
#PAPERLESS_GS_BINARY = /usr/bin/gs
 | 
			
		||||
#PAPERLESS_GS_BINARY=/usr/bin/gs
 | 
			
		||||
 | 
			
		||||
# Unpaper
 | 
			
		||||
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
 | 
			
		||||
 | 
			
		||||
@ -24,12 +24,17 @@ then
 | 
			
		||||
	rm "$PAPERLESS_DIST" -r
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
mkdir "$PAPERLESS_DIST"
 | 
			
		||||
mkdir "$PAPERLESS_DIST_APP"
 | 
			
		||||
mkdir "$PAPERLESS_DIST_APP/docker"
 | 
			
		||||
 | 
			
		||||
# setup dependencies.
 | 
			
		||||
 | 
			
		||||
cd "$PAPERLESS_ROOT"
 | 
			
		||||
 | 
			
		||||
pipenv clean
 | 
			
		||||
pipenv install --dev
 | 
			
		||||
pipenv lock --keep-outdated -r > "$PAPERLESS_DIST_APP/requirements.txt"
 | 
			
		||||
 | 
			
		||||
# test if the application works.
 | 
			
		||||
 | 
			
		||||
@ -44,10 +49,6 @@ make clean html
 | 
			
		||||
 | 
			
		||||
# copy stuff into place
 | 
			
		||||
 | 
			
		||||
mkdir "$PAPERLESS_DIST"
 | 
			
		||||
mkdir "$PAPERLESS_DIST_APP"
 | 
			
		||||
mkdir "$PAPERLESS_DIST_APP/docker"
 | 
			
		||||
 | 
			
		||||
# the application itself
 | 
			
		||||
 | 
			
		||||
cp "$PAPERLESS_ROOT/.env" \
 | 
			
		||||
@ -92,8 +93,6 @@ cd "$PAPERLESS_DIST_APP"
 | 
			
		||||
 | 
			
		||||
docker build . -t "jonaswinkler/paperless-ng:$VERSION"
 | 
			
		||||
 | 
			
		||||
docker push "jonaswinkler/paperless-ng:$VERSION"
 | 
			
		||||
 | 
			
		||||
# works. package the app!
 | 
			
		||||
 | 
			
		||||
cd "$PAPERLESS_DIST"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										23
									
								
								scripts/push-release.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										23
									
								
								scripts/push-release.sh
									
									
									
									
									
										Executable file
									
								
							@ -0,0 +1,23 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
set -e
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
VERSION=$1
 | 
			
		||||
 | 
			
		||||
if [ -z "$VERSION" ]
 | 
			
		||||
then
 | 
			
		||||
	echo "Need a version string."
 | 
			
		||||
	exit 1
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# source root directory of paperless
 | 
			
		||||
PAPERLESS_ROOT=$(git rev-parse --show-toplevel)
 | 
			
		||||
 | 
			
		||||
# output directory
 | 
			
		||||
PAPERLESS_DIST="$PAPERLESS_ROOT/dist"
 | 
			
		||||
PAPERLESS_DIST_APP="$PAPERLESS_DIST/paperless-ng"
 | 
			
		||||
 | 
			
		||||
cd "$PAPERLESS_DIST_APP"
 | 
			
		||||
 | 
			
		||||
docker push "jonaswinkler/paperless-ng:$VERSION"
 | 
			
		||||
@ -132,6 +132,28 @@
 | 
			
		||||
            </a>
 | 
			
		||||
          </li>
 | 
			
		||||
        </ul>
 | 
			
		||||
 | 
			
		||||
        <h6 class="sidebar-heading d-flex justify-content-between align-items-center px-3 mt-4 mb-1 text-muted">
 | 
			
		||||
          <span>Misc</span>
 | 
			
		||||
        </h6>
 | 
			
		||||
        <ul class="nav flex-column mb-2">
 | 
			
		||||
          <li class="nav-item">
 | 
			
		||||
            <a class="nav-link" href="https://paperless-ng.readthedocs.io/en/latest/">
 | 
			
		||||
              <svg class="sidebaricon" fill="currentColor">
 | 
			
		||||
                <use xlink:href="assets/bootstrap-icons.svg#question-circle"/>
 | 
			
		||||
              </svg>
 | 
			
		||||
              Documentation
 | 
			
		||||
            </a>
 | 
			
		||||
          </li>
 | 
			
		||||
          <li class="nav-item">
 | 
			
		||||
            <a class="nav-link" href="https://github.com/jonaswinkler/paperless-ng">
 | 
			
		||||
              <svg class="sidebaricon" fill="currentColor">
 | 
			
		||||
                <use xlink:href="assets/bootstrap-icons.svg#link"/>
 | 
			
		||||
              </svg>
 | 
			
		||||
              Github
 | 
			
		||||
            </a>
 | 
			
		||||
          </li>
 | 
			
		||||
        </ul>
 | 
			
		||||
      </div>
 | 
			
		||||
    </nav>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,6 @@
 | 
			
		||||
<div class="row pt-3 pb-2 mb-3 border-bottom align-items-center">
 | 
			
		||||
<div class="row pt-3 pb-1 mb-3 border-bottom align-items-center" >
 | 
			
		||||
  <div class="col text-truncate">
 | 
			
		||||
    <h1 class="h2 text-truncate">{{title}}</h1>
 | 
			
		||||
    <h1 class="h2 text-truncate" style="line-height: 1.4">{{title}}</h1>
 | 
			
		||||
  </div>
 | 
			
		||||
  <div class="btn-toolbar col-auto">
 | 
			
		||||
    <ng-content></ng-content>
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,7 @@
 | 
			
		||||
.log-entry-10 {
 | 
			
		||||
  color: lightslategray !important;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.log-entry-30 {
 | 
			
		||||
  color: yellow !important;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -3,7 +3,6 @@ import hashlib
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import uuid
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from django.db import transaction
 | 
			
		||||
@ -12,6 +11,7 @@ from django.utils import timezone
 | 
			
		||||
from paperless.db import GnuPG
 | 
			
		||||
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
 | 
			
		||||
from .file_handling import generate_filename, create_source_path_directory
 | 
			
		||||
from .loggers import LoggingMixin
 | 
			
		||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
 | 
			
		||||
from .parsers import ParseError, get_parser_class
 | 
			
		||||
from .signals import (
 | 
			
		||||
@ -24,12 +24,10 @@ class ConsumerError(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Consumer:
 | 
			
		||||
class Consumer(LoggingMixin):
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
 | 
			
		||||
        self.logger = logging.getLogger(__name__)
 | 
			
		||||
        self.logging_group = None
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        self.path = None
 | 
			
		||||
        self.filename = None
 | 
			
		||||
        self.override_title = None
 | 
			
		||||
@ -74,11 +72,6 @@ class Consumer:
 | 
			
		||||
        os.makedirs(settings.THUMBNAIL_DIR, exist_ok=True)
 | 
			
		||||
        os.makedirs(settings.ORIGINALS_DIR, exist_ok=True)
 | 
			
		||||
 | 
			
		||||
    def log(self, level, message):
 | 
			
		||||
        getattr(self.logger, level)(message, extra={
 | 
			
		||||
            "group": self.logging_group
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    def try_consume_file(self,
 | 
			
		||||
                         path,
 | 
			
		||||
                         override_filename=None,
 | 
			
		||||
@ -100,7 +93,7 @@ class Consumer:
 | 
			
		||||
        # this is for grouping logging entries for this particular file
 | 
			
		||||
        # together.
 | 
			
		||||
 | 
			
		||||
        self.logging_group = uuid.uuid4()
 | 
			
		||||
        self.renew_logging_group()
 | 
			
		||||
 | 
			
		||||
        # Make sure that preconditions for consuming the file are met.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -86,7 +86,7 @@ def generate_filename(document):
 | 
			
		||||
                added_day=document.added.day if document.added else "none",
 | 
			
		||||
                tags=tags,
 | 
			
		||||
            )
 | 
			
		||||
    except (ValueError, KeyError, IndexError) as e:
 | 
			
		||||
    except (ValueError, KeyError, IndexError):
 | 
			
		||||
        logging.getLogger(__name__).warning("Invalid PAPERLESS_FILENAME_FORMAT: {}, falling back to default,".format(settings.PAPERLESS_FILENAME_FORMAT))
 | 
			
		||||
 | 
			
		||||
    # Always append the primary key to guarantee uniqueness of filename
 | 
			
		||||
 | 
			
		||||
@ -32,6 +32,9 @@ class UploadForm(forms.Form):
 | 
			
		||||
 | 
			
		||||
        t = int(mktime(datetime.now().timetuple()))
 | 
			
		||||
 | 
			
		||||
        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
			
		||||
 | 
			
		||||
        # TODO: dont just append pdf. This is here for taht weird regex check at the start of the consumer.
 | 
			
		||||
        with tempfile.NamedTemporaryFile(prefix="paperless-upload-", suffix=".pdf", dir=settings.SCRATCH_DIR, delete=False) as f:
 | 
			
		||||
 | 
			
		||||
            f.write(document)
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
import logging
 | 
			
		||||
import uuid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PaperlessHandler(logging.Handler):
 | 
			
		||||
@ -13,3 +14,19 @@ class PaperlessHandler(logging.Handler):
 | 
			
		||||
            kwargs["group"] = record.group
 | 
			
		||||
 | 
			
		||||
        Log.objects.create(**kwargs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LoggingMixin:
 | 
			
		||||
 | 
			
		||||
    logging_group = None
 | 
			
		||||
 | 
			
		||||
    def renew_logging_group(self):
 | 
			
		||||
        self.logging_group = uuid.uuid4()
 | 
			
		||||
 | 
			
		||||
    def log(self, level, message):
 | 
			
		||||
        target = ".".join([self.__class__.__module__, self.__class__.__name__])
 | 
			
		||||
        logger = logging.getLogger(target)
 | 
			
		||||
 | 
			
		||||
        getattr(logger, level)(message, extra={
 | 
			
		||||
            "group": self.logging_group
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
@ -1,7 +1,4 @@
 | 
			
		||||
# Generated by Django 3.1.3 on 2020-11-07 12:35
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from django.db import migrations, models
 | 
			
		||||
import django.db.models.deletion
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -20,6 +20,7 @@ from django.utils import timezone
 | 
			
		||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 | 
			
		||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
 | 
			
		||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
 | 
			
		||||
from documents.loggers import LoggingMixin
 | 
			
		||||
from documents.signals import document_consumer_declaration
 | 
			
		||||
 | 
			
		||||
# TODO: isnt there a date parsing library for this?
 | 
			
		||||
@ -101,17 +102,17 @@ class ParseError(Exception):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class DocumentParser:
 | 
			
		||||
class DocumentParser(LoggingMixin):
 | 
			
		||||
    """
 | 
			
		||||
    Subclass this to make your own parser.  Have a look at
 | 
			
		||||
    `paperless_tesseract.parsers` for inspiration.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, path, logging_group):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        self.logging_group = logging_group
 | 
			
		||||
        self.document_path = path
 | 
			
		||||
        self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
 | 
			
		||||
        self.logger = logging.getLogger(__name__)
 | 
			
		||||
        self.logging_group = logging_group
 | 
			
		||||
 | 
			
		||||
    def get_thumbnail(self):
 | 
			
		||||
        """
 | 
			
		||||
@ -121,6 +122,7 @@ class DocumentParser:
 | 
			
		||||
 | 
			
		||||
    def optimise_thumbnail(self, in_path):
 | 
			
		||||
 | 
			
		||||
        if settings.OPTIMIZE_THUMBNAILS:
 | 
			
		||||
            out_path = os.path.join(self.tempdir, "optipng.png")
 | 
			
		||||
 | 
			
		||||
            args = (settings.OPTIPNG_BINARY, "-silent", "-o5", in_path, "-out", out_path)
 | 
			
		||||
@ -131,6 +133,8 @@ class DocumentParser:
 | 
			
		||||
                raise ParseError("Optipng failed at {}".format(args))
 | 
			
		||||
 | 
			
		||||
            return out_path
 | 
			
		||||
        else:
 | 
			
		||||
            return in_path
 | 
			
		||||
 | 
			
		||||
    def get_optimised_thumbnail(self):
 | 
			
		||||
        return self.optimise_thumbnail(self.get_thumbnail())
 | 
			
		||||
@ -222,11 +226,6 @@ class DocumentParser:
 | 
			
		||||
 | 
			
		||||
        return date
 | 
			
		||||
 | 
			
		||||
    def log(self, level, message):
 | 
			
		||||
        getattr(self.logger, level)(message, extra={
 | 
			
		||||
            "group": self.logging_group
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    def cleanup(self):
 | 
			
		||||
        self.log("debug", "Deleting directory {}".format(self.tempdir))
 | 
			
		||||
        shutil.rmtree(self.tempdir)
 | 
			
		||||
 | 
			
		||||
@ -2,11 +2,10 @@ import os
 | 
			
		||||
import shutil
 | 
			
		||||
import tempfile
 | 
			
		||||
from unittest import mock
 | 
			
		||||
from unittest.mock import MagicMock
 | 
			
		||||
 | 
			
		||||
from django.contrib.auth.models import User
 | 
			
		||||
from django.test import override_settings
 | 
			
		||||
from rest_framework.test import APITestCase, APIClient
 | 
			
		||||
from rest_framework.test import APITestCase
 | 
			
		||||
 | 
			
		||||
from documents.models import Document, Correspondent, DocumentType, Tag
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -80,6 +80,6 @@ class TestClassifier(TestCase):
 | 
			
		||||
 | 
			
		||||
        self.classifier.save_classifier()
 | 
			
		||||
 | 
			
		||||
        newClassifier = DocumentClassifier()
 | 
			
		||||
        newClassifier.reload()
 | 
			
		||||
        self.assertFalse(newClassifier.train())
 | 
			
		||||
        new_classifier = DocumentClassifier()
 | 
			
		||||
        new_classifier.reload()
 | 
			
		||||
        self.assertFalse(new_classifier.train())
 | 
			
		||||
 | 
			
		||||
@ -5,8 +5,6 @@ import tempfile
 | 
			
		||||
from unittest import mock
 | 
			
		||||
from unittest.mock import MagicMock
 | 
			
		||||
 | 
			
		||||
from django.conf import settings
 | 
			
		||||
from django.db import DatabaseError
 | 
			
		||||
from django.test import TestCase, override_settings
 | 
			
		||||
 | 
			
		||||
from ..consumer import Consumer, ConsumerError
 | 
			
		||||
@ -504,9 +502,9 @@ class TestConsumer(TestCase):
 | 
			
		||||
 | 
			
		||||
    def testOverrideFilename(self):
 | 
			
		||||
        filename = self.get_test_file()
 | 
			
		||||
        overrideFilename = "My Bank - Statement for November.pdf"
 | 
			
		||||
        override_filename = "My Bank - Statement for November.pdf"
 | 
			
		||||
 | 
			
		||||
        document = self.consumer.try_consume_file(filename, override_filename=overrideFilename)
 | 
			
		||||
        document = self.consumer.try_consume_file(filename, override_filename=override_filename)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(document.correspondent.name, "My Bank")
 | 
			
		||||
        self.assertEqual(document.title, "Statement for November")
 | 
			
		||||
 | 
			
		||||
@ -72,11 +72,11 @@ def binaries_check(app_configs, **kwargs):
 | 
			
		||||
@register()
 | 
			
		||||
def debug_mode_check(app_configs, **kwargs):
 | 
			
		||||
    if settings.DEBUG:
 | 
			
		||||
        return [Warning("DEBUG mode is enabled. Disable Debug mode. "
 | 
			
		||||
                        "This is a serious security "
 | 
			
		||||
                        "issue, since it puts security overides in place which"
 | 
			
		||||
                        "are meant to be only used during development. This"
 | 
			
		||||
                        "also means that paperless will tell anyone various"
 | 
			
		||||
        return [Warning(
 | 
			
		||||
            "DEBUG mode is enabled. Disable Debug mode. This is a serious "
 | 
			
		||||
            "security issue, since it puts security overides in place which "
 | 
			
		||||
            "are meant to be only used during development. This "
 | 
			
		||||
            "also means that paperless will tell anyone various "
 | 
			
		||||
            "debugging information when something goes wrong.")]
 | 
			
		||||
    else:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
@ -257,6 +257,14 @@ LOGGING = {
 | 
			
		||||
            "handlers": ["dbhandler", "streamhandler"],
 | 
			
		||||
            "level": "DEBUG"
 | 
			
		||||
        },
 | 
			
		||||
        "paperless_mail": {
 | 
			
		||||
            "handlers": ["dbhandler", "streamhandler"],
 | 
			
		||||
            "level": "DEBUG"
 | 
			
		||||
        },
 | 
			
		||||
        "paperless_tesseract": {
 | 
			
		||||
            "handlers": ["dbhandler", "streamhandler"],
 | 
			
		||||
            "level": "DEBUG"
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -312,6 +320,8 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
 | 
			
		||||
 | 
			
		||||
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
 | 
			
		||||
 | 
			
		||||
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
 | 
			
		||||
 | 
			
		||||
# The default language that tesseract will attempt to use when parsing
 | 
			
		||||
# documents.  It should be a 3-letter language code consistent with ISO 639.
 | 
			
		||||
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
 | 
			
		||||
 | 
			
		||||
@ -1,18 +1,7 @@
 | 
			
		||||
from django.contrib import admin
 | 
			
		||||
from django import forms
 | 
			
		||||
 | 
			
		||||
from paperless_mail.models import MailAccount, MailRule
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MailAccountForm(forms.ModelForm):
 | 
			
		||||
 | 
			
		||||
    password = forms.CharField(widget=forms.PasswordInput)
 | 
			
		||||
 | 
			
		||||
    class Meta:
 | 
			
		||||
        fields = '__all__'
 | 
			
		||||
        model = MailAccount
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MailAccountAdmin(admin.ModelAdmin):
 | 
			
		||||
 | 
			
		||||
    list_display = ("name", "imap_server", "username")
 | 
			
		||||
@ -20,6 +9,8 @@ class MailAccountAdmin(admin.ModelAdmin):
 | 
			
		||||
 | 
			
		||||
class MailRuleAdmin(admin.ModelAdmin):
 | 
			
		||||
 | 
			
		||||
    list_filter = ("account",)
 | 
			
		||||
 | 
			
		||||
    list_display = ("name", "account", "folder", "action")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -8,6 +8,7 @@ from django_q.tasks import async_task
 | 
			
		||||
from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \
 | 
			
		||||
    MailboxFolderSelectError
 | 
			
		||||
 | 
			
		||||
from documents.loggers import LoggingMixin
 | 
			
		||||
from documents.models import Correspondent
 | 
			
		||||
from paperless_mail.models import MailAccount, MailRule
 | 
			
		||||
 | 
			
		||||
@ -83,72 +84,6 @@ def make_criterias(rule):
 | 
			
		||||
    return {**criterias, **get_rule_action(rule).get_criteria()}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def handle_mail_account(account):
 | 
			
		||||
 | 
			
		||||
    if account.imap_security == MailAccount.IMAP_SECURITY_NONE:
 | 
			
		||||
        mailbox = MailBoxUnencrypted(account.imap_server, account.imap_port)
 | 
			
		||||
    elif account.imap_security == MailAccount.IMAP_SECURITY_STARTTLS:
 | 
			
		||||
        mailbox = MailBox(account.imap_server, account.imap_port, starttls=True)
 | 
			
		||||
    elif account.imap_security == MailAccount.IMAP_SECURITY_SSL:
 | 
			
		||||
        mailbox = MailBox(account.imap_server, account.imap_port)
 | 
			
		||||
    else:
 | 
			
		||||
        raise ValueError("Unknown IMAP security")
 | 
			
		||||
 | 
			
		||||
    total_processed_files = 0
 | 
			
		||||
 | 
			
		||||
    with mailbox as M:
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            M.login(account.username, account.password)
 | 
			
		||||
        except Exception:
 | 
			
		||||
            raise MailError(
 | 
			
		||||
                f"Error while authenticating account {account.name}")
 | 
			
		||||
 | 
			
		||||
        for rule in account.rules.all():
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                M.folder.set(rule.folder)
 | 
			
		||||
            except MailboxFolderSelectError:
 | 
			
		||||
                raise MailError(
 | 
			
		||||
                    f"Rule {rule.name}: Folder {rule.folder} does not exist "
 | 
			
		||||
                    f"in account {account.name}")
 | 
			
		||||
 | 
			
		||||
            criterias = make_criterias(rule)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
 | 
			
		||||
            except Exception:
 | 
			
		||||
                raise MailError(
 | 
			
		||||
                    f"Rule {rule.name}: Error while fetching folder "
 | 
			
		||||
                    f"{rule.folder} of account {account.name}")
 | 
			
		||||
 | 
			
		||||
            post_consume_messages = []
 | 
			
		||||
 | 
			
		||||
            for message in messages:
 | 
			
		||||
                try:
 | 
			
		||||
                    processed_files = handle_message(message, rule)
 | 
			
		||||
                except Exception:
 | 
			
		||||
                    raise MailError(
 | 
			
		||||
                        f"Rule {rule.name}: Error while processing mail "
 | 
			
		||||
                        f"{message.uid} of account {account.name}")
 | 
			
		||||
                if processed_files > 0:
 | 
			
		||||
                    post_consume_messages.append(message.uid)
 | 
			
		||||
 | 
			
		||||
                total_processed_files += processed_files
 | 
			
		||||
            try:
 | 
			
		||||
                get_rule_action(rule).post_consume(
 | 
			
		||||
                    M,
 | 
			
		||||
                    post_consume_messages,
 | 
			
		||||
                    rule.action_parameter)
 | 
			
		||||
 | 
			
		||||
            except Exception:
 | 
			
		||||
                raise MailError(
 | 
			
		||||
                    f"Rule {rule.name}: Error while processing post-consume "
 | 
			
		||||
                    f"actions for account {account.name}")
 | 
			
		||||
 | 
			
		||||
    return total_processed_files
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_title(message, att, rule):
 | 
			
		||||
    if rule.assign_title_from == MailRule.TITLE_FROM_SUBJECT:
 | 
			
		||||
        title = message.subject
 | 
			
		||||
@ -189,10 +124,121 @@ def get_correspondent(message, rule):
 | 
			
		||||
    return correspondent
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def handle_message(message, rule):
 | 
			
		||||
def get_mailbox(server, port, security):
 | 
			
		||||
    if security == MailAccount.IMAP_SECURITY_NONE:
 | 
			
		||||
        mailbox = MailBoxUnencrypted(server, port)
 | 
			
		||||
    elif security == MailAccount.IMAP_SECURITY_STARTTLS:
 | 
			
		||||
        mailbox = MailBox(server, port, starttls=True)
 | 
			
		||||
    elif security == MailAccount.IMAP_SECURITY_SSL:
 | 
			
		||||
        mailbox = MailBox(server, port)
 | 
			
		||||
    else:
 | 
			
		||||
        raise ValueError("Unknown IMAP security")
 | 
			
		||||
    return mailbox
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MailAccountHandler(LoggingMixin):
 | 
			
		||||
 | 
			
		||||
    def handle_mail_account(self, account):
 | 
			
		||||
 | 
			
		||||
        self.renew_logging_group()
 | 
			
		||||
 | 
			
		||||
        self.log('debug', f"Processing mail account {account}")
 | 
			
		||||
 | 
			
		||||
        total_processed_files = 0
 | 
			
		||||
 | 
			
		||||
        with get_mailbox(account.imap_server,
 | 
			
		||||
                         account.imap_port,
 | 
			
		||||
                         account.imap_security) as M:
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                M.login(account.username, account.password)
 | 
			
		||||
            except Exception:
 | 
			
		||||
                raise MailError(
 | 
			
		||||
                    f"Error while authenticating account {account.name}")
 | 
			
		||||
 | 
			
		||||
            self.log('debug', f"Account {account}: Processing "
 | 
			
		||||
                              f"{account.rules.count()} rule(s)")
 | 
			
		||||
 | 
			
		||||
            for rule in account.rules.all():
 | 
			
		||||
                self.log(
 | 
			
		||||
                    'debug',
 | 
			
		||||
                    f"Account {account}: Processing rule {rule.name}")
 | 
			
		||||
 | 
			
		||||
                self.log(
 | 
			
		||||
                    'debug',
 | 
			
		||||
                    f"Rule {account}.{rule}: Selecting folder {rule.folder}")
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    M.folder.set(rule.folder)
 | 
			
		||||
                except MailboxFolderSelectError:
 | 
			
		||||
                    raise MailError(
 | 
			
		||||
                        f"Rule {rule.name}: Folder {rule.folder} does not exist "
 | 
			
		||||
                        f"in account {account.name}")
 | 
			
		||||
 | 
			
		||||
                criterias = make_criterias(rule)
 | 
			
		||||
 | 
			
		||||
                self.log(
 | 
			
		||||
                    'debug',
 | 
			
		||||
                    f"Rule {account}.{rule}: Searching folder with criteria "
 | 
			
		||||
                    f"{str(AND(**criterias))}")
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    messages = M.fetch(criteria=AND(**criterias), mark_seen=False)
 | 
			
		||||
                except Exception:
 | 
			
		||||
                    raise MailError(
 | 
			
		||||
                        f"Rule {rule.name}: Error while fetching folder "
 | 
			
		||||
                        f"{rule.folder} of account {account.name}")
 | 
			
		||||
 | 
			
		||||
                post_consume_messages = []
 | 
			
		||||
 | 
			
		||||
                mails_processed = 0
 | 
			
		||||
 | 
			
		||||
                for message in messages:
 | 
			
		||||
                    try:
 | 
			
		||||
                        processed_files = self.handle_message(message, rule)
 | 
			
		||||
                    except Exception:
 | 
			
		||||
                        raise MailError(
 | 
			
		||||
                            f"Rule {rule.name}: Error while processing mail "
 | 
			
		||||
                            f"{message.uid} of account {account.name}")
 | 
			
		||||
                    if processed_files > 0:
 | 
			
		||||
                        post_consume_messages.append(message.uid)
 | 
			
		||||
 | 
			
		||||
                    total_processed_files += processed_files
 | 
			
		||||
                    mails_processed += 1
 | 
			
		||||
 | 
			
		||||
                self.log(
 | 
			
		||||
                    'debug',
 | 
			
		||||
                    f"Rule {account}.{rule}: Processed {mails_processed} "
 | 
			
		||||
                    f"matching mail(s)")
 | 
			
		||||
 | 
			
		||||
                self.log(
 | 
			
		||||
                    'debug',
 | 
			
		||||
                    f"Rule {account}.{rule}: Running mail actions on "
 | 
			
		||||
                    f"{len(post_consume_messages)} mails")
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    get_rule_action(rule).post_consume(
 | 
			
		||||
                        M,
 | 
			
		||||
                        post_consume_messages,
 | 
			
		||||
                        rule.action_parameter)
 | 
			
		||||
 | 
			
		||||
                except Exception:
 | 
			
		||||
                    raise MailError(
 | 
			
		||||
                        f"Rule {rule.name}: Error while processing post-consume "
 | 
			
		||||
                        f"actions for account {account.name}")
 | 
			
		||||
 | 
			
		||||
        return total_processed_files
 | 
			
		||||
 | 
			
		||||
    def handle_message(self, message, rule):
 | 
			
		||||
        if not message.attachments:
 | 
			
		||||
            return 0
 | 
			
		||||
 | 
			
		||||
        self.log(
 | 
			
		||||
            'debug',
 | 
			
		||||
            f"Rule {rule.account}.{rule}: "
 | 
			
		||||
            f"Processing mail {message.subject} from {message.from_} with "
 | 
			
		||||
            f"{len(message.attachments)} attachment(s)")
 | 
			
		||||
 | 
			
		||||
        correspondent = get_correspondent(message, rule)
 | 
			
		||||
        tag = rule.assign_tag
 | 
			
		||||
        doc_type = rule.assign_document_type
 | 
			
		||||
@ -211,6 +257,12 @@ def handle_message(message, rule):
 | 
			
		||||
                with open(temp_filename, 'wb') as f:
 | 
			
		||||
                    f.write(att.payload)
 | 
			
		||||
 | 
			
		||||
                self.log(
 | 
			
		||||
                    'info',
 | 
			
		||||
                    f"Rule {rule.account}.{rule}: "
 | 
			
		||||
                    f"Consuming attachment {att.filename} from mail "
 | 
			
		||||
                    f"{message.subject} from {message.from_}")
 | 
			
		||||
 | 
			
		||||
                async_task(
 | 
			
		||||
                    "documents.tasks.consume_file",
 | 
			
		||||
                    path=temp_filename,
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,6 @@
 | 
			
		||||
from django.core.management.base import BaseCommand
 | 
			
		||||
 | 
			
		||||
from paperless_mail import mail, tasks
 | 
			
		||||
from paperless_mail import tasks
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Command(BaseCommand):
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										23
									
								
								src/paperless_mail/migrations/0003_auto_20201118_1940.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								src/paperless_mail/migrations/0003_auto_20201118_1940.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,23 @@
 | 
			
		||||
# Generated by Django 3.1.3 on 2020-11-18 19:40
 | 
			
		||||
 | 
			
		||||
from django.db import migrations, models
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Migration(migrations.Migration):
 | 
			
		||||
 | 
			
		||||
    dependencies = [
 | 
			
		||||
        ('paperless_mail', '0002_auto_20201117_1334'),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    operations = [
 | 
			
		||||
        migrations.AlterField(
 | 
			
		||||
            model_name='mailaccount',
 | 
			
		||||
            name='imap_port',
 | 
			
		||||
            field=models.IntegerField(blank=True, help_text='This is usually 143 for unencrypted and STARTTLS connections, and 993 for SSL connections.', null=True),
 | 
			
		||||
        ),
 | 
			
		||||
        migrations.AlterField(
 | 
			
		||||
            model_name='mailrule',
 | 
			
		||||
            name='name',
 | 
			
		||||
            field=models.CharField(max_length=256, unique=True),
 | 
			
		||||
        ),
 | 
			
		||||
    ]
 | 
			
		||||
@ -1,8 +1,5 @@
 | 
			
		||||
from django.db import models
 | 
			
		||||
 | 
			
		||||
# Create your models here.
 | 
			
		||||
from django.db import models
 | 
			
		||||
 | 
			
		||||
import documents.models as document_models
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -22,7 +19,11 @@ class MailAccount(models.Model):
 | 
			
		||||
 | 
			
		||||
    imap_server = models.CharField(max_length=256)
 | 
			
		||||
 | 
			
		||||
    imap_port = models.IntegerField(blank=True, null=True)
 | 
			
		||||
    imap_port = models.IntegerField(
 | 
			
		||||
        blank=True,
 | 
			
		||||
        null=True,
 | 
			
		||||
        help_text="This is usually 143 for unencrypted and STARTTLS "
 | 
			
		||||
                  "connections, and 993 for SSL connections.")
 | 
			
		||||
 | 
			
		||||
    imap_security = models.PositiveIntegerField(
 | 
			
		||||
        choices=IMAP_SECURITY_OPTIONS,
 | 
			
		||||
@ -71,7 +72,7 @@ class MailRule(models.Model):
 | 
			
		||||
        (CORRESPONDENT_FROM_CUSTOM, "Use correspondent selected below")
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    name = models.CharField(max_length=256)
 | 
			
		||||
    name = models.CharField(max_length=256, unique=True)
 | 
			
		||||
 | 
			
		||||
    account = models.ForeignKey(
 | 
			
		||||
        MailAccount,
 | 
			
		||||
 | 
			
		||||
@ -1,13 +1,13 @@
 | 
			
		||||
import logging
 | 
			
		||||
 | 
			
		||||
from paperless_mail import mail
 | 
			
		||||
from paperless_mail.mail import MailAccountHandler
 | 
			
		||||
from paperless_mail.models import MailAccount
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def process_mail_accounts():
 | 
			
		||||
    total_new_documents = 0
 | 
			
		||||
    for account in MailAccount.objects.all():
 | 
			
		||||
        total_new_documents += mail.handle_mail_account(account)
 | 
			
		||||
        total_new_documents += MailAccountHandler().handle_mail_account(account)
 | 
			
		||||
 | 
			
		||||
    if total_new_documents > 0:
 | 
			
		||||
        return f"Added {total_new_documents} document(s)."
 | 
			
		||||
@ -18,6 +18,6 @@ def process_mail_accounts():
 | 
			
		||||
def process_mail_account(name):
 | 
			
		||||
    account = MailAccount.objects.find(name=name)
 | 
			
		||||
    if account:
 | 
			
		||||
        mail.handle_mail_account(account)
 | 
			
		||||
        MailAccountHandler().handle_mail_account(account)
 | 
			
		||||
    else:
 | 
			
		||||
        logging.error("Unknown mail acccount: {}".format(name))
 | 
			
		||||
 | 
			
		||||
@ -7,7 +7,7 @@ from django.test import TestCase
 | 
			
		||||
from imap_tools import MailMessageFlags, MailboxFolderSelectError
 | 
			
		||||
 | 
			
		||||
from documents.models import Correspondent
 | 
			
		||||
from paperless_mail.mail import get_correspondent, get_title, handle_message, handle_mail_account, MailError
 | 
			
		||||
from paperless_mail.mail import MailError, MailAccountHandler, get_correspondent, get_title
 | 
			
		||||
from paperless_mail.models import MailRule, MailAccount
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -126,6 +126,8 @@ class TestMail(TestCase):
 | 
			
		||||
 | 
			
		||||
        self.reset_bogus_mailbox()
 | 
			
		||||
 | 
			
		||||
        self.mail_account_handler = MailAccountHandler()
 | 
			
		||||
 | 
			
		||||
    def reset_bogus_mailbox(self):
 | 
			
		||||
        self.bogus_mailbox.messages = []
 | 
			
		||||
        self.bogus_mailbox.messages_spam = []
 | 
			
		||||
@ -145,10 +147,10 @@ class TestMail(TestCase):
 | 
			
		||||
        me_localhost = Correspondent.objects.create(name=message2.from_)
 | 
			
		||||
        someone_else = Correspondent.objects.create(name="someone else")
 | 
			
		||||
 | 
			
		||||
        rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING)
 | 
			
		||||
        rule = MailRule(name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING)
 | 
			
		||||
        self.assertIsNone(get_correspondent(message, rule))
 | 
			
		||||
 | 
			
		||||
        rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL)
 | 
			
		||||
        rule = MailRule(name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL)
 | 
			
		||||
        c = get_correspondent(message, rule)
 | 
			
		||||
        self.assertIsNotNone(c)
 | 
			
		||||
        self.assertEqual(c.name, "someone@somewhere.com")
 | 
			
		||||
@ -157,7 +159,7 @@ class TestMail(TestCase):
 | 
			
		||||
        self.assertEqual(c.name, "me@localhost.com")
 | 
			
		||||
        self.assertEqual(c.id, me_localhost.id)
 | 
			
		||||
 | 
			
		||||
        rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME)
 | 
			
		||||
        rule = MailRule(name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME)
 | 
			
		||||
        c = get_correspondent(message, rule)
 | 
			
		||||
        self.assertIsNotNone(c)
 | 
			
		||||
        self.assertEqual(c.name, "Someone!")
 | 
			
		||||
@ -165,7 +167,7 @@ class TestMail(TestCase):
 | 
			
		||||
        self.assertIsNotNone(c)
 | 
			
		||||
        self.assertEqual(c.id, me_localhost.id)
 | 
			
		||||
 | 
			
		||||
        rule = MailRule(assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else)
 | 
			
		||||
        rule = MailRule(name="d", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_CUSTOM, assign_correspondent=someone_else)
 | 
			
		||||
        c = get_correspondent(message, rule)
 | 
			
		||||
        self.assertEqual(c, someone_else)
 | 
			
		||||
 | 
			
		||||
@ -174,14 +176,15 @@ class TestMail(TestCase):
 | 
			
		||||
        message.subject = "the message title"
 | 
			
		||||
        att = namedtuple('Attachment', [])
 | 
			
		||||
        att.filename = "this_is_the_file.pdf"
 | 
			
		||||
        rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME)
 | 
			
		||||
        rule = MailRule(name="a", assign_title_from=MailRule.TITLE_FROM_FILENAME)
 | 
			
		||||
        self.assertEqual(get_title(message, att, rule), "this_is_the_file")
 | 
			
		||||
        rule = MailRule(assign_title_from=MailRule.TITLE_FROM_SUBJECT)
 | 
			
		||||
        rule = MailRule(name="b", assign_title_from=MailRule.TITLE_FROM_SUBJECT)
 | 
			
		||||
        self.assertEqual(get_title(message, att, rule), "the message title")
 | 
			
		||||
 | 
			
		||||
    def test_handle_message(self):
 | 
			
		||||
        message = namedtuple('MailMessage', [])
 | 
			
		||||
        message.subject = "the message title"
 | 
			
		||||
        message.from_ = "Myself"
 | 
			
		||||
 | 
			
		||||
        att = namedtuple('Attachment', [])
 | 
			
		||||
        att.filename = "test1.pdf"
 | 
			
		||||
@ -200,9 +203,10 @@ class TestMail(TestCase):
 | 
			
		||||
 | 
			
		||||
        message.attachments = [att, att2, att3]
 | 
			
		||||
 | 
			
		||||
        rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME)
 | 
			
		||||
        account = MailAccount()
 | 
			
		||||
        rule = MailRule(assign_title_from=MailRule.TITLE_FROM_FILENAME, account=account)
 | 
			
		||||
 | 
			
		||||
        result = handle_message(message, rule)
 | 
			
		||||
        result = self.mail_account_handler.handle_message(message, rule)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(result, 2)
 | 
			
		||||
 | 
			
		||||
@ -224,7 +228,7 @@ class TestMail(TestCase):
 | 
			
		||||
        message.attachments = []
 | 
			
		||||
        rule = MailRule()
 | 
			
		||||
 | 
			
		||||
        result = handle_message(message, rule)
 | 
			
		||||
        result = self.mail_account_handler.handle_message(message, rule)
 | 
			
		||||
 | 
			
		||||
        self.assertFalse(m.called)
 | 
			
		||||
        self.assertEqual(result, 0)
 | 
			
		||||
@ -235,11 +239,13 @@ class TestMail(TestCase):
 | 
			
		||||
 | 
			
		||||
        rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MARK_READ)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 0)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 2)
 | 
			
		||||
        handle_mail_account(account)
 | 
			
		||||
        self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 2)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.fetch("UNSEEN", False)), 0)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
 | 
			
		||||
    def test_handle_mail_account_delete(self):
 | 
			
		||||
 | 
			
		||||
@ -249,7 +255,7 @@ class TestMail(TestCase):
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 0)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
        handle_mail_account(account)
 | 
			
		||||
        self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 2)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 1)
 | 
			
		||||
 | 
			
		||||
@ -258,11 +264,13 @@ class TestMail(TestCase):
 | 
			
		||||
 | 
			
		||||
        rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_FLAG, filter_subject="Invoice")
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 0)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 2)
 | 
			
		||||
        handle_mail_account(account)
 | 
			
		||||
        self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 1)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.fetch("UNFLAGGED", False)), 1)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
 | 
			
		||||
    def test_handle_mail_account_move(self):
 | 
			
		||||
        account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="secret")
 | 
			
		||||
@ -272,7 +280,7 @@ class TestMail(TestCase):
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 0)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages_spam), 0)
 | 
			
		||||
        handle_mail_account(account)
 | 
			
		||||
        self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 1)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 2)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages_spam), 1)
 | 
			
		||||
@ -281,7 +289,7 @@ class TestMail(TestCase):
 | 
			
		||||
        account = MailAccount.objects.create(name="test", imap_server="", username="admin", password="wrong")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            handle_mail_account(account)
 | 
			
		||||
            self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        except MailError as e:
 | 
			
		||||
            self.assertTrue(str(e).startswith("Error while authenticating account"))
 | 
			
		||||
        else:
 | 
			
		||||
@ -291,7 +299,7 @@ class TestMail(TestCase):
 | 
			
		||||
        rule = MailRule.objects.create(name="testrule", account=account, folder="uuuh")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            handle_mail_account(account)
 | 
			
		||||
            self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        except MailError as e:
 | 
			
		||||
            self.assertTrue("uuuh does not exist" in str(e))
 | 
			
		||||
        else:
 | 
			
		||||
@ -299,10 +307,10 @@ class TestMail(TestCase):
 | 
			
		||||
 | 
			
		||||
        account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
 | 
			
		||||
 | 
			
		||||
        rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim")
 | 
			
		||||
        rule = MailRule.objects.create(name="testrule2", account=account, action=MailRule.ACTION_MOVE, action_parameter="doesnotexist", filter_subject="Claim")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            handle_mail_account(account)
 | 
			
		||||
            self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        except MailError as e:
 | 
			
		||||
            self.assertTrue("Error while processing post-consume actions" in str(e))
 | 
			
		||||
        else:
 | 
			
		||||
@ -311,12 +319,12 @@ class TestMail(TestCase):
 | 
			
		||||
    def test_filters(self):
 | 
			
		||||
 | 
			
		||||
        account = MailAccount.objects.create(name="test3", imap_server="", username="admin", password="secret")
 | 
			
		||||
        rule = MailRule.objects.create(name="testrule", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim")
 | 
			
		||||
        rule = MailRule.objects.create(name="testrule3", account=account, action=MailRule.ACTION_DELETE, filter_subject="Claim")
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 0)
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
        handle_mail_account(account)
 | 
			
		||||
        self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 2)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 1)
 | 
			
		||||
 | 
			
		||||
@ -326,7 +334,7 @@ class TestMail(TestCase):
 | 
			
		||||
        rule.filter_body = "electronic"
 | 
			
		||||
        rule.save()
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
        handle_mail_account(account)
 | 
			
		||||
        self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 2)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 2)
 | 
			
		||||
 | 
			
		||||
@ -336,7 +344,7 @@ class TestMail(TestCase):
 | 
			
		||||
        rule.filter_body = None
 | 
			
		||||
        rule.save()
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
        handle_mail_account(account)
 | 
			
		||||
        self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 1)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 4)
 | 
			
		||||
 | 
			
		||||
@ -347,6 +355,6 @@ class TestMail(TestCase):
 | 
			
		||||
        rule.filter_subject = "Invoice"
 | 
			
		||||
        rule.save()
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
			
		||||
        handle_mail_account(account)
 | 
			
		||||
        self.mail_account_handler.handle_mail_account(account)
 | 
			
		||||
        self.assertEqual(len(self.bogus_mailbox.messages), 2)
 | 
			
		||||
        self.assertEqual(self.async_task.call_count, 5)
 | 
			
		||||
 | 
			
		||||
@ -1,3 +0,0 @@
 | 
			
		||||
from django.shortcuts import render
 | 
			
		||||
 | 
			
		||||
# Create your views here.
 | 
			
		||||
@ -86,7 +86,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
            return self._text
 | 
			
		||||
 | 
			
		||||
        if not settings.OCR_ALWAYS and self._is_ocred():
 | 
			
		||||
            self.log("info", "Skipping OCR, using Text from PDF")
 | 
			
		||||
            self.log("debug", "Skipping OCR, using Text from PDF")
 | 
			
		||||
            self._text = get_text_from_pdf(self.document_path)
 | 
			
		||||
            return self._text
 | 
			
		||||
 | 
			
		||||
@ -98,7 +98,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
        try:
 | 
			
		||||
 | 
			
		||||
            sample_page_index = int(len(images) / 2)
 | 
			
		||||
            self.log("info", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images)))
 | 
			
		||||
            self.log("debug", "Attempting language detection on page {} of {}...".format(sample_page_index + 1, len(images)))
 | 
			
		||||
            sample_page_text = self._ocr([images[sample_page_index]], settings.OCR_LANGUAGE)[0]
 | 
			
		||||
            guessed_language = self._guess_language(sample_page_text)
 | 
			
		||||
 | 
			
		||||
@ -107,7 +107,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
                ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
 | 
			
		||||
 | 
			
		||||
            elif ISO639[guessed_language] == settings.OCR_LANGUAGE:
 | 
			
		||||
                self.log("info", "Detected language: {} (default language)".format(guessed_language))
 | 
			
		||||
                self.log("debug", "Detected language: {} (default language)".format(guessed_language))
 | 
			
		||||
                ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
 | 
			
		||||
 | 
			
		||||
            elif not ISO639[guessed_language] in pyocr.get_available_tools()[0].get_available_languages():
 | 
			
		||||
@ -115,10 +115,10 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
                ocr_pages = self._complete_ocr_default_language(images, sample_page_index, sample_page_text)
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                self.log("info", "Detected language: {}".format(guessed_language))
 | 
			
		||||
                self.log("debug", "Detected language: {}".format(guessed_language))
 | 
			
		||||
                ocr_pages = self._ocr(images, ISO639[guessed_language])
 | 
			
		||||
 | 
			
		||||
            self.log("info", "OCR completed.")
 | 
			
		||||
            self.log("debug", "OCR completed.")
 | 
			
		||||
            self._text = strip_excess_whitespace(" ".join(ocr_pages))
 | 
			
		||||
            return self._text
 | 
			
		||||
 | 
			
		||||
@ -130,7 +130,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
        Greyscale images are easier for Tesseract to OCR
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        self.log("info", "Converting document {} into greyscale images...".format(self.document_path))
 | 
			
		||||
        self.log("debug", "Converting document {} into greyscale images...".format(self.document_path))
 | 
			
		||||
 | 
			
		||||
        # Convert PDF to multiple PNMs
 | 
			
		||||
        pnm = os.path.join(self.tempdir, "convert-%04d.pnm")
 | 
			
		||||
@ -148,7 +148,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
            if f.endswith(".pnm"):
 | 
			
		||||
                pnms.append(os.path.join(self.tempdir, f))
 | 
			
		||||
 | 
			
		||||
        self.log("info", "Running unpaper on {} pages...".format(len(pnms)))
 | 
			
		||||
        self.log("debug", "Running unpaper on {} pages...".format(len(pnms)))
 | 
			
		||||
 | 
			
		||||
        # Run unpaper in parallel on converted images
 | 
			
		||||
        with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool:
 | 
			
		||||
@ -161,11 +161,11 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
            guess = langdetect.detect(text)
 | 
			
		||||
            return guess
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            self.log('debug', "Language detection failed with: {}".format(e))
 | 
			
		||||
            self.log('warning', "Language detection failed with: {}".format(e))
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def _ocr(self, imgs, lang):
 | 
			
		||||
        self.log("info", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang))
 | 
			
		||||
        self.log("debug", "Performing OCR on {} page(s) with language {}".format(len(imgs), lang))
 | 
			
		||||
        with ThreadPool(processes=settings.THREADS_PER_WORKER) as pool:
 | 
			
		||||
            r = pool.map(image_to_string, itertools.product(imgs, [lang]))
 | 
			
		||||
            return r
 | 
			
		||||
@ -180,7 +180,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
        images_copy = list(images)
 | 
			
		||||
        del images_copy[sample_page_index]
 | 
			
		||||
        if images_copy:
 | 
			
		||||
            self.log('info', 'Continuing ocr with default language.')
 | 
			
		||||
            self.log('debug', 'Continuing ocr with default language.')
 | 
			
		||||
            ocr_pages = self._ocr(images_copy, settings.OCR_LANGUAGE)
 | 
			
		||||
            ocr_pages.insert(sample_page_index, sample_page)
 | 
			
		||||
            return ocr_pages
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user