Further reowrk of this test

This commit is contained in:
Trenton H 2024-06-28 10:20:20 -07:00
parent db268386a5
commit 60d4aae1ea
2 changed files with 111 additions and 114 deletions

View File

@ -0,0 +1,9 @@
import zoneinfo
import pytest
from pytest_django.fixtures import SettingsWrapper
@pytest.fixture()
def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo:
return zoneinfo.ZoneInfo(settings.TIME_ZONE)

View File

@ -1,8 +1,7 @@
import datetime import datetime
from zoneinfo import ZoneInfo
from dateutil import tz from pytest_django.fixtures import SettingsWrapper
from django.conf import settings
from django.test import override_settings
from documents.parsers import parse_date from documents.parsers import parse_date
from documents.parsers import parse_date_generator from documents.parsers import parse_date_generator
@ -21,29 +20,15 @@ class TestDate:
text = "lorem ipsum 20180213 lorem ipsum" text = "lorem ipsum 20180213 lorem ipsum"
assert parse_date("", text) is None assert parse_date("", text) is None
def test_date_format_4(self): def test_date_format_4(self, settings_timezone: ZoneInfo):
text = "lorem ipsum 13.02.2018 lorem ipsum" text = "lorem ipsum 13.02.2018 lorem ipsum"
date = parse_date("", text) date = parse_date("", text)
assert date == datetime.datetime( assert date == datetime.datetime(2018, 2, 13, 0, 0, tzinfo=settings_timezone)
2018,
2,
13,
0,
0,
tzinfo=tz.gettz(settings.TIME_ZONE),
)
def test_date_format_5(self): def test_date_format_5(self, settings_timezone: ZoneInfo):
text = "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem ipsum" text = "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem ipsum"
date = parse_date("", text) date = parse_date("", text)
assert date == datetime.datetime( assert date == datetime.datetime(2018, 2, 13, 0, 0, tzinfo=settings_timezone)
2018,
2,
13,
0,
0,
tzinfo=tz.gettz(settings.TIME_ZONE),
)
def test_date_format_6(self): def test_date_format_6(self):
text = ( text = (
@ -59,19 +44,12 @@ class TestDate:
) )
assert parse_date("", text) is None assert parse_date("", text) is None
def test_date_format_7(self): def test_date_format_7(self, settings_timezone: ZoneInfo):
text = "lorem ipsum\nMärz 2019\nlorem ipsum" text = "lorem ipsum\nMärz 2019\nlorem ipsum"
date = parse_date("", text) date = parse_date("", text)
assert date == datetime.datetime( assert date == datetime.datetime(2019, 3, 1, 0, 0, tzinfo=settings_timezone)
2019,
3,
1,
0,
0,
tzinfo=tz.gettz(settings.TIME_ZONE),
)
def test_date_format_8(self): def test_date_format_8(self, settings_timezone: ZoneInfo):
text = ( text = (
"lorem ipsum\n" "lorem ipsum\n"
"Wohnort\n" "Wohnort\n"
@ -90,10 +68,10 @@ class TestDate:
1, 1,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_9(self): def test_date_format_9(self, settings_timezone: ZoneInfo):
text = "lorem ipsum\n27. Nullmonth 2020\nMärz 2020\nlorem ipsum" text = "lorem ipsum\n27. Nullmonth 2020\nMärz 2020\nlorem ipsum"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2020, 2020,
@ -101,10 +79,10 @@ class TestDate:
1, 1,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_10(self): def test_date_format_10(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22-MAR-2022 Credit Card 1934829304" text = "Customer Number Currency 22-MAR-2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -112,10 +90,10 @@ class TestDate:
22, 22,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_11(self): def test_date_format_11(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22 MAR 2022 Credit Card 1934829304" text = "Customer Number Currency 22 MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -123,10 +101,10 @@ class TestDate:
22, 22,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_12(self): def test_date_format_12(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22/MAR/2022 Credit Card 1934829304" text = "Customer Number Currency 22/MAR/2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -134,10 +112,10 @@ class TestDate:
22, 22,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_13(self): def test_date_format_13(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22.MAR.2022 Credit Card 1934829304" text = "Customer Number Currency 22.MAR.2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -145,10 +123,10 @@ class TestDate:
22, 22,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_14(self): def test_date_format_14(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22.MAR 2022 Credit Card 1934829304" text = "Customer Number Currency 22.MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -156,26 +134,26 @@ class TestDate:
22, 22,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_15(self): def test_date_format_15(self):
text = "Customer Number Currency 22.MAR.22 Credit Card 1934829304" text = "Customer Number Currency 22.MAR.22 Credit Card 1934829304"
assert parse_date("", text) is None, None assert parse_date("", text) is None
def test_date_format_16(self): def test_date_format_16(self):
text = "Customer Number Currency 22.MAR,22 Credit Card 1934829304" text = "Customer Number Currency 22.MAR,22 Credit Card 1934829304"
assert parse_date("", text) is None, None assert parse_date("", text) is None
def test_date_format_17(self): def test_date_format_17(self):
text = "Customer Number Currency 22,MAR,2022 Credit Card 1934829304" text = "Customer Number Currency 22,MAR,2022 Credit Card 1934829304"
assert parse_date("", text) is None, None assert parse_date("", text) is None
def test_date_format_18(self): def test_date_format_18(self):
text = "Customer Number Currency 22 MAR,2022 Credit Card 1934829304" text = "Customer Number Currency 22 MAR,2022 Credit Card 1934829304"
assert parse_date("", text) is None, None assert parse_date("", text) is None
def test_date_format_19(self): def test_date_format_19(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 21st MAR 2022 Credit Card 1934829304" text = "Customer Number Currency 21st MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -183,10 +161,10 @@ class TestDate:
21, 21,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_20(self): def test_date_format_20(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 22nd March 2022 Credit Card 1934829304" text = "Customer Number Currency 22nd March 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -194,10 +172,10 @@ class TestDate:
22, 22,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_21(self): def test_date_format_21(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 2nd MAR 2022 Credit Card 1934829304" text = "Customer Number Currency 2nd MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -205,10 +183,10 @@ class TestDate:
2, 2,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_22(self): def test_date_format_22(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 23rd MAR 2022 Credit Card 1934829304" text = "Customer Number Currency 23rd MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -216,10 +194,10 @@ class TestDate:
23, 23,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_23(self): def test_date_format_23(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 24th MAR 2022 Credit Card 1934829304" text = "Customer Number Currency 24th MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -227,10 +205,10 @@ class TestDate:
24, 24,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_24(self): def test_date_format_24(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 21-MAR-2022 Credit Card 1934829304" text = "Customer Number Currency 21-MAR-2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -238,10 +216,10 @@ class TestDate:
21, 21,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_25(self): def test_date_format_25(self, settings_timezone: ZoneInfo):
text = "Customer Number Currency 25TH MAR 2022 Credit Card 1934829304" text = "Customer Number Currency 25TH MAR 2022 Credit Card 1934829304"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2022, 2022,
@ -249,10 +227,10 @@ class TestDate:
25, 25,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_date_format_26(self): def test_date_format_26(self, settings_timezone: ZoneInfo):
text = "CHASE 0 September 25, 2019 JPMorgan Chase Bank, NA. P0 Box 182051" text = "CHASE 0 September 25, 2019 JPMorgan Chase Bank, NA. P0 Box 182051"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2019, 2019,
@ -260,7 +238,7 @@ class TestDate:
25, 25,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_crazy_date_past(self): def test_crazy_date_past(self):
@ -272,14 +250,14 @@ class TestDate:
def test_crazy_date_with_spaces(self): def test_crazy_date_with_spaces(self):
assert parse_date("", "20 408000l 2475") is None assert parse_date("", "20 408000l 2475") is None
def test_utf_month_names(self): def test_utf_month_names(self, settings_timezone: ZoneInfo):
assert parse_date("", "13 décembre 2023") == datetime.datetime( assert parse_date("", "13 décembre 2023") == datetime.datetime(
2023, 2023,
12, 12,
13, 13,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "13 août 2022") == datetime.datetime( assert parse_date("", "13 août 2022") == datetime.datetime(
2022, 2022,
@ -287,7 +265,7 @@ class TestDate:
13, 13,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "11 März 2020") == datetime.datetime( assert parse_date("", "11 März 2020") == datetime.datetime(
2020, 2020,
@ -295,7 +273,7 @@ class TestDate:
11, 11,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "17. ožujka 2018.") == datetime.datetime( assert parse_date("", "17. ožujka 2018.") == datetime.datetime(
2018, 2018,
@ -303,7 +281,7 @@ class TestDate:
17, 17,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "1. veljače 2016.") == datetime.datetime( assert parse_date("", "1. veljače 2016.") == datetime.datetime(
2016, 2016,
@ -311,7 +289,7 @@ class TestDate:
1, 1,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "15. února 1985") == datetime.datetime( assert parse_date("", "15. února 1985") == datetime.datetime(
1985, 1985,
@ -319,7 +297,7 @@ class TestDate:
15, 15,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "30. září 2011") == datetime.datetime( assert parse_date("", "30. září 2011") == datetime.datetime(
2011, 2011,
@ -327,7 +305,7 @@ class TestDate:
30, 30,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "28. května 1990") == datetime.datetime( assert parse_date("", "28. května 1990") == datetime.datetime(
1990, 1990,
@ -335,7 +313,7 @@ class TestDate:
28, 28,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "1. grudzień 1997") == datetime.datetime( assert parse_date("", "1. grudzień 1997") == datetime.datetime(
1997, 1997,
@ -343,7 +321,7 @@ class TestDate:
1, 1,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "17 Şubat 2024") == datetime.datetime( assert parse_date("", "17 Şubat 2024") == datetime.datetime(
2024, 2024,
@ -351,7 +329,7 @@ class TestDate:
17, 17,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "30 Ağustos 2012") == datetime.datetime( assert parse_date("", "30 Ağustos 2012") == datetime.datetime(
2012, 2012,
@ -359,7 +337,7 @@ class TestDate:
30, 30,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "17 Eylül 2000") == datetime.datetime( assert parse_date("", "17 Eylül 2000") == datetime.datetime(
2000, 2000,
@ -367,7 +345,7 @@ class TestDate:
17, 17,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert parse_date("", "5. október 1992") == datetime.datetime( assert parse_date("", "5. október 1992") == datetime.datetime(
1992, 1992,
@ -375,30 +353,23 @@ class TestDate:
5, 5,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
def test_multiple_dates(self): def test_multiple_dates(self, settings_timezone: ZoneInfo):
text = """This text has multiple dates. text = """This text has multiple dates.
For example 02.02.2018, 22 July 2022 and December 2021. For example 02.02.2018, 22 July 2022 and December 2021.
But not 24-12-9999 because it's in the future...""" But not 24-12-9999 because it's in the future..."""
dates = list(parse_date_generator("", text)) dates = list(parse_date_generator("", text))
assert len(dates) == 3 assert len(dates) == 3
assert dates[0] == datetime.datetime( assert dates[0] == datetime.datetime(2018, 2, 2, 0, 0, tzinfo=settings_timezone)
2018,
2,
2,
0,
0,
tzinfo=tz.gettz(settings.TIME_ZONE),
)
assert dates[1] == datetime.datetime( assert dates[1] == datetime.datetime(
2022, 2022,
7, 7,
22, 22,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
assert dates[2] == datetime.datetime( assert dates[2] == datetime.datetime(
2021, 2021,
@ -406,11 +377,14 @@ class TestDate:
1, 1,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
@override_settings(FILENAME_DATE_ORDER="YMD") def test_filename_date_parse_valid_ymd(
def test_filename_date_parse_valid_ymd(self, *args): self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
""" """
GIVEN: GIVEN:
- Date parsing from the filename is enabled - Date parsing from the filename is enabled
@ -420,13 +394,18 @@ class TestDate:
THEN: THEN:
- Should parse the date from the filename - Should parse the date from the filename
""" """
settings.FILENAME_DATE_ORDER = "YMD"
assert parse_date( assert parse_date(
"/tmp/Scan-2022-04-01.pdf", "/tmp/Scan-2022-04-01.pdf",
"No date in here", "No date in here",
) == datetime.datetime(2022, 4, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) ) == datetime.datetime(2022, 4, 1, 0, 0, tzinfo=settings_timezone)
@override_settings(FILENAME_DATE_ORDER="DMY") def test_filename_date_parse_valid_dmy(
def test_filename_date_parse_valid_dmy(self, *args): self,
settings: SettingsWrapper,
settings_timezone: ZoneInfo,
):
""" """
GIVEN: GIVEN:
- Date parsing from the filename is enabled - Date parsing from the filename is enabled
@ -436,13 +415,13 @@ class TestDate:
THEN: THEN:
- Should parse the date from the filename - Should parse the date from the filename
""" """
settings.FILENAME_DATE_ORDER = "DMY"
assert parse_date( assert parse_date(
"/tmp/Scan-10.01.2021.pdf", "/tmp/Scan-10.01.2021.pdf",
"No date in here", "No date in here",
) == datetime.datetime(2021, 1, 10, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) ) == datetime.datetime(2021, 1, 10, 0, 0, tzinfo=settings_timezone)
@override_settings(FILENAME_DATE_ORDER="YMD") def test_filename_date_parse_invalid(self, settings: SettingsWrapper):
def test_filename_date_parse_invalid(self, *args):
""" """
GIVEN: GIVEN:
- Date parsing from the filename is enabled - Date parsing from the filename is enabled
@ -452,13 +431,14 @@ class TestDate:
THEN: THEN:
- No date is parsed - No date is parsed
""" """
settings.FILENAME_DATE_ORDER = "YMD"
assert parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here") is None assert parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here") is None
@override_settings( def test_filename_date_ignored_use_content(
FILENAME_DATE_ORDER="YMD", self,
IGNORE_DATES=(datetime.date(2022, 4, 1),), settings: SettingsWrapper,
) settings_timezone: ZoneInfo,
def test_filename_date_ignored_use_content(self, *args): ):
""" """
GIVEN: GIVEN:
- Date parsing from the filename is enabled - Date parsing from the filename is enabled
@ -471,15 +451,18 @@ class TestDate:
THEN: THEN:
- Should parse the date from the content not filename - Should parse the date from the content not filename
""" """
settings.FILENAME_DATE_ORDER = "YMD"
settings.IGNORE_DATES = (datetime.date(2022, 4, 1),)
assert parse_date( assert parse_date(
"/tmp/Scan-2022-04-01.pdf", "/tmp/Scan-2022-04-01.pdf",
"The matching date is 24.03.2022", "The matching date is 24.03.2022",
) == datetime.datetime(2022, 3, 24, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) ) == datetime.datetime(2022, 3, 24, 0, 0, tzinfo=settings_timezone)
@override_settings( def test_ignored_dates_default_order(
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)), self,
) settings: SettingsWrapper,
def test_ignored_dates_default_order(self, *args): settings_timezone: ZoneInfo,
):
""" """
GIVEN: GIVEN:
- Ignore dates have been set - Ignore dates have been set
@ -489,6 +472,7 @@ class TestDate:
THEN: THEN:
- Should parse the date non-ignored date from content - Should parse the date non-ignored date from content
""" """
settings.IGNORE_DATES = (datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem ipsum" text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem ipsum"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
2018, 2018,
@ -496,14 +480,14 @@ class TestDate:
13, 13,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )
@override_settings( def test_ignored_dates_order_ymd(
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)), self,
DATE_ORDER="YMD", settings: SettingsWrapper,
) settings_timezone: ZoneInfo,
def test_ignored_dates_order_ymd(self, *args): ):
""" """
GIVEN: GIVEN:
- Ignore dates have been set - Ignore dates have been set
@ -514,6 +498,10 @@ class TestDate:
THEN: THEN:
- Should parse the date non-ignored date from content - Should parse the date non-ignored date from content
""" """
settings.FILENAME_DATE_ORDER = "YMD"
settings.IGNORE_DATES = (datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
text = "lorem ipsum 190311, 20200117 and lorem 13.02.2018 lorem ipsum" text = "lorem ipsum 190311, 20200117 and lorem 13.02.2018 lorem ipsum"
assert parse_date("", text) == datetime.datetime( assert parse_date("", text) == datetime.datetime(
@ -522,5 +510,5 @@ class TestDate:
13, 13,
0, 0,
0, 0,
tzinfo=tz.gettz(settings.TIME_ZONE), tzinfo=settings_timezone,
) )