import datetime
import hashlib
import logging
import os
import re
import uuid

from django.conf import settings
from django.db import transaction
from django.utils import timezone

from paperless.db import GnuPG
from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
from .file_handling import generate_filename, create_source_path_directory
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
from .parsers import ParseError, get_parser_class
from .signals import (
    document_consumption_finished,
    document_consumption_started
)


class ConsumerError(Exception):
    pass


class Consumer:

    def __init__(self):

        self.logger = logging.getLogger(__name__)
        self.logging_group = None

        self.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
        if settings.PASSPHRASE:
            self.storage_type = Document.STORAGE_TYPE_GPG

    @staticmethod
    def pre_check_file_exists(filename):
        if not os.path.isfile(filename):
            raise ConsumerError("Cannot consume {}: It is not a file".format(
                filename))

    @staticmethod
    def pre_check_consumption_dir():
        if not settings.CONSUMPTION_DIR:
            raise ConsumerError(
                "The CONSUMPTION_DIR settings variable does not appear to be "
                "set.")

        if not os.path.isdir(settings.CONSUMPTION_DIR):
            raise ConsumerError(
                "Consumption directory {} does not exist".format(
                    settings.CONSUMPTION_DIR))

    @staticmethod
    def pre_check_regex(filename):
        if not re.match(FileInfo.REGEXES["title"], filename):
            raise ConsumerError(
                "Filename {} does not seem to be safe to "
                "consume".format(filename))

    @staticmethod
    def pre_check_duplicate(filename):
        with open(filename, "rb") as f:
            checksum = hashlib.md5(f.read()).hexdigest()
        if Document.objects.filter(checksum=checksum).exists():
            if settings.CONSUMER_DELETE_DUPLICATES:
                os.unlink(filename)
            raise ConsumerError(
                "Not consuming {}: It is a duplicate.".format(filename)
            )

    @staticmethod
    def pre_check_scratch_fir():
        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)

    def log(self, level, message):
        getattr(self.logger, level)(message, extra={
            "group": self.logging_group
        })

    def try_consume_file(self,
                         filename,
                         original_filename=None,
                         force_title=None,
                         force_correspondent_id=None,
                         force_document_type_id=None,
                         force_tag_ids=None):
        """
        Return the document object if it was successfully created.
        """

        # this is for grouping logging entries for this particular file
        # together.

        self.logging_group = uuid.uuid4()

        # Make sure that preconditions for consuming the file are met.

        self.pre_check_file_exists(filename)
        self.pre_check_consumption_dir()
        self.pre_check_scratch_fir()
        self.pre_check_regex(filename)
        self.pre_check_duplicate(filename)

        self.log("info", "Consuming {}".format(filename))

        # Determine the parser class.

        parser_class = get_parser_class(original_filename or filename)
        if not parser_class:
            raise ConsumerError("No parsers abvailable for {}".format(filename))
        else:
            self.log("debug", "Parser: {}".format(parser_class.__name__))

        # Notify all listeners that we're going to do some work.

        document_consumption_started.send(
            sender=self.__class__,
            filename=filename,
            logging_group=self.logging_group
        )

        # This doesn't parse the document yet, but gives us a parser.

        document_parser = parser_class(filename, self.logging_group)

        # However, this already created working directories which we have to
        # clean up.

        # Parse the document. This may take some time.

        try:
            self.log("debug", "Generating thumbnail for {}...".format(filename))
            thumbnail = document_parser.get_optimised_thumbnail()
            self.log("debug", "Parsing {}...".format(filename))
            text = document_parser.get_text()
            date = document_parser.get_date()
        except ParseError as e:
            document_parser.cleanup()
            raise ConsumerError(e)

        # Prepare the document classifier.

        # TODO: I don't really like to do this here, but this way we avoid
        #   reloading the classifier multiple times, since there are multiple
        #   post-consume hooks that all require the classifier.

        try:
            classifier = DocumentClassifier()
            classifier.reload()
        except (FileNotFoundError, IncompatibleClassifierVersionError) as e:
            logging.getLogger(__name__).warning(
                "Cannot classify documents: {}.".format(e))
            classifier = None

        # now that everything is done, we can start to store the document
        # in the system. This will be a transaction and reasonably fast.
        try:
            with transaction.atomic():

                # store the document.
                document = self._store(
                    text=text,
                    doc=filename,
                    thumbnail=thumbnail,
                    date=date,
                    original_filename=original_filename,
                    force_title=force_title,
                    force_correspondent_id=force_correspondent_id,
                    force_document_type_id=force_document_type_id,
                    force_tag_ids=force_tag_ids
                )

                # If we get here, it was successful. Proceed with post-consume
                # hooks. If they fail, nothing will get changed.

                document_consumption_finished.send(
                    sender=self.__class__,
                    document=document,
                    logging_group=self.logging_group,
                    classifier=classifier
                )

                # After everything is in the database, copy the files into
                # place. If this fails, we'll also rollback the transaction.

                create_source_path_directory(document.source_path)
                self._write(document, filename, document.source_path)
                self._write(document, thumbnail, document.thumbnail_path)

                # Delete the file only if it was successfully consumed
                self.log("debug", "Deleting document {}".format(filename))
                os.unlink(filename)
        except Exception as e:
            raise ConsumerError(e)
        finally:
            document_parser.cleanup()

        self.log(
            "info",
            "Document {} consumption finished".format(document)
        )

        return document

    def _store(self, text, doc, thumbnail, date,
               original_filename=None,
               force_title=None,
               force_correspondent_id=None,
               force_document_type_id=None,
               force_tag_ids=None):

        # If someone gave us the original filename, use it instead of doc.

        file_info = FileInfo.from_path(original_filename or doc)

        stats = os.stat(doc)

        self.log("debug", "Saving record to database")

        created = file_info.created or date or timezone.make_aware(
            datetime.datetime.fromtimestamp(stats.st_mtime))

        with open(doc, "rb") as f:
            document = Document.objects.create(
                correspondent=file_info.correspondent,
                title=file_info.title,
                content=text,
                file_type=file_info.extension,
                checksum=hashlib.md5(f.read()).hexdigest(),
                created=created,
                modified=created,
                storage_type=self.storage_type
            )

        relevant_tags = set(file_info.tags)
        if relevant_tags:
            tag_names = ", ".join([t.slug for t in relevant_tags])
            self.log("debug", "Tagging with {}".format(tag_names))
            document.tags.add(*relevant_tags)

        if force_title:
            document.title = force_title

        if force_correspondent_id:
            document.correspondent = Correspondent.objects.get(pk=force_correspondent_id)

        if force_document_type_id:
            document.document_type = DocumentType.objects.get(pk=force_document_type_id)

        if force_tag_ids:
            for tag_id in force_tag_ids:
                document.tags.add(Tag.objects.get(pk=tag_id))

        document.filename = generate_filename(document)

        # We need to save the document twice, since we need the PK of the
        # document in order to create its filename above.
        document.save()

        return document

    def _write(self, document, source, target):
        with open(source, "rb") as read_file:
            with open(target, "wb") as write_file:
                if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
                    write_file.write(read_file.read())
                    return
                self.log("debug", "Encrypting")
                write_file.write(GnuPG.encrypted(read_file))