58 lines
1.4 KiB
Python
58 lines
1.4 KiB
Python
import logging
|
|
|
|
from django.conf import settings
|
|
from django_q.tasks import async_task, result
|
|
from whoosh.writing import AsyncWriter
|
|
|
|
from documents import index
|
|
from documents.classifier import DocumentClassifier, \
|
|
IncompatibleClassifierVersionError
|
|
from documents.mail import MailFetcher
|
|
from documents.models import Document
|
|
|
|
|
|
def consume_mail():
|
|
MailFetcher().pull()
|
|
|
|
|
|
def index_optimize():
|
|
index.open_index().optimize()
|
|
|
|
|
|
def index_reindex():
|
|
documents = Document.objects.all()
|
|
|
|
ix = index.open_index(recreate=True)
|
|
|
|
with AsyncWriter(ix) as writer:
|
|
for document in documents:
|
|
index.update_document(writer, document)
|
|
|
|
|
|
def train_classifier():
|
|
classifier = DocumentClassifier()
|
|
|
|
try:
|
|
# load the classifier, since we might not have to train it again.
|
|
classifier.reload()
|
|
except (FileNotFoundError, IncompatibleClassifierVersionError):
|
|
# This is what we're going to fix here.
|
|
pass
|
|
|
|
try:
|
|
if classifier.train():
|
|
logging.getLogger(__name__).info(
|
|
"Saving updated classifier model to {}...".format(
|
|
settings.MODEL_FILE)
|
|
)
|
|
classifier.save_classifier()
|
|
else:
|
|
logging.getLogger(__name__).debug(
|
|
"Training data unchanged."
|
|
)
|
|
|
|
except Exception as e:
|
|
logging.getLogger(__name__).error(
|
|
"Classifier error: " + str(e)
|
|
)
|