From 80d5f4f97da14ab35aefc0a6dab27b726e9a5738 Mon Sep 17 00:00:00 2001 From: Fabien Dubuy <59292746+fdubuy@users.noreply.github.com> Date: Tue, 25 Jun 2024 22:46:18 +0200 Subject: [PATCH] =?UTF-8?q?removed=20tukish=20letter=20=C4=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/documents/parsers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/documents/parsers.py b/src/documents/parsers.py index 72aea0387..09b1442c0 100644 --- a/src/documents/parsers.py +++ b/src/documents/parsers.py @@ -39,11 +39,11 @@ from documents.utils import run_subprocess DATE_REGEX = re.compile( r"(\b|(?!=([_-])))(\d{1,2})[\.\/-](\d{1,2})[\.\/-](\d{4}|\d{2})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))(\d{4}|\d{2})[\.\/-](\d{1,2})[\.\/-](\d{1,2})(\b|(?=([_-])))|" - r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞığü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞığü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|" + r"(\b|(?!=([_-])))(\d{1,2}[\. ]+[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{4}|[a-zéûäëčžúřěáíóńźçŞğü]{3,9} \d{1,2}, \d{4})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{1,2}, (\d{4}))(\b|(?=([_-])))|" r"(\b|(?!=([_-])))([^\W\d_]{3,9} \d{4})(\b|(?=([_-])))|" r"(\b|(?!=([_-])))(\d{1,2}[^ ]{2}[\. ]+[^ ]{3,9}[ \.\/-]\d{4})(\b|(?=([_-])))|" - r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞığü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))", + r"(\b|(?!=([_-])))(\b\d{1,2}[ \.\/-][a-zéûäëčžúřěáíóńźçŞğü]{3}[ \.\/-]\d{4})(\b|(?=([_-])))", re.IGNORECASE, )