diff --git a/src/documents/tests/conftest.py b/src/documents/tests/conftest.py new file mode 100644 index 000000000..aa86f6e63 --- /dev/null +++ b/src/documents/tests/conftest.py @@ -0,0 +1,9 @@ +import zoneinfo + +import pytest +from pytest_django.fixtures import SettingsWrapper + + +@pytest.fixture() +def settings_timezone(settings: SettingsWrapper) -> zoneinfo.ZoneInfo: + return zoneinfo.ZoneInfo(settings.TIME_ZONE) diff --git a/src/documents/tests/test_date_parsing.py b/src/documents/tests/test_date_parsing.py index f923aa48a..5bb29c8fa 100644 --- a/src/documents/tests/test_date_parsing.py +++ b/src/documents/tests/test_date_parsing.py @@ -1,8 +1,7 @@ import datetime +from zoneinfo import ZoneInfo -from dateutil import tz -from django.conf import settings -from django.test import override_settings +from pytest_django.fixtures import SettingsWrapper from documents.parsers import parse_date from documents.parsers import parse_date_generator @@ -21,29 +20,15 @@ class TestDate: text = "lorem ipsum 20180213 lorem ipsum" assert parse_date("", text) is None - def test_date_format_4(self): + def test_date_format_4(self, settings_timezone: ZoneInfo): text = "lorem ipsum 13.02.2018 lorem ipsum" date = parse_date("", text) - assert date == datetime.datetime( - 2018, - 2, - 13, - 0, - 0, - tzinfo=tz.gettz(settings.TIME_ZONE), - ) + assert date == datetime.datetime(2018, 2, 13, 0, 0, tzinfo=settings_timezone) - def test_date_format_5(self): + def test_date_format_5(self, settings_timezone: ZoneInfo): text = "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem ipsum" date = parse_date("", text) - assert date == datetime.datetime( - 2018, - 2, - 13, - 0, - 0, - tzinfo=tz.gettz(settings.TIME_ZONE), - ) + assert date == datetime.datetime(2018, 2, 13, 0, 0, tzinfo=settings_timezone) def test_date_format_6(self): text = ( @@ -59,19 +44,12 @@ class TestDate: ) assert parse_date("", text) is None - def test_date_format_7(self): + def test_date_format_7(self, settings_timezone: ZoneInfo): text = "lorem ipsum\nMärz 2019\nlorem ipsum" date = parse_date("", text) - assert date == datetime.datetime( - 2019, - 3, - 1, - 0, - 0, - tzinfo=tz.gettz(settings.TIME_ZONE), - ) + assert date == datetime.datetime(2019, 3, 1, 0, 0, tzinfo=settings_timezone) - def test_date_format_8(self): + def test_date_format_8(self, settings_timezone: ZoneInfo): text = ( "lorem ipsum\n" "Wohnort\n" @@ -90,10 +68,10 @@ class TestDate: 1, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_9(self): + def test_date_format_9(self, settings_timezone: ZoneInfo): text = "lorem ipsum\n27. Nullmonth 2020\nMärz 2020\nlorem ipsum" assert parse_date("", text) == datetime.datetime( 2020, @@ -101,10 +79,10 @@ class TestDate: 1, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_10(self): + def test_date_format_10(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 22-MAR-2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -112,10 +90,10 @@ class TestDate: 22, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_11(self): + def test_date_format_11(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 22 MAR 2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -123,10 +101,10 @@ class TestDate: 22, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_12(self): + def test_date_format_12(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 22/MAR/2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -134,10 +112,10 @@ class TestDate: 22, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_13(self): + def test_date_format_13(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 22.MAR.2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -145,10 +123,10 @@ class TestDate: 22, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_14(self): + def test_date_format_14(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 22.MAR 2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -156,26 +134,26 @@ class TestDate: 22, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) def test_date_format_15(self): text = "Customer Number Currency 22.MAR.22 Credit Card 1934829304" - assert parse_date("", text) is None, None + assert parse_date("", text) is None def test_date_format_16(self): text = "Customer Number Currency 22.MAR,22 Credit Card 1934829304" - assert parse_date("", text) is None, None + assert parse_date("", text) is None def test_date_format_17(self): text = "Customer Number Currency 22,MAR,2022 Credit Card 1934829304" - assert parse_date("", text) is None, None + assert parse_date("", text) is None def test_date_format_18(self): text = "Customer Number Currency 22 MAR,2022 Credit Card 1934829304" - assert parse_date("", text) is None, None + assert parse_date("", text) is None - def test_date_format_19(self): + def test_date_format_19(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 21st MAR 2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -183,10 +161,10 @@ class TestDate: 21, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_20(self): + def test_date_format_20(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 22nd March 2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -194,10 +172,10 @@ class TestDate: 22, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_21(self): + def test_date_format_21(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 2nd MAR 2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -205,10 +183,10 @@ class TestDate: 2, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_22(self): + def test_date_format_22(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 23rd MAR 2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -216,10 +194,10 @@ class TestDate: 23, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_23(self): + def test_date_format_23(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 24th MAR 2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -227,10 +205,10 @@ class TestDate: 24, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_24(self): + def test_date_format_24(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 21-MAR-2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -238,10 +216,10 @@ class TestDate: 21, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_25(self): + def test_date_format_25(self, settings_timezone: ZoneInfo): text = "Customer Number Currency 25TH MAR 2022 Credit Card 1934829304" assert parse_date("", text) == datetime.datetime( 2022, @@ -249,10 +227,10 @@ class TestDate: 25, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_date_format_26(self): + def test_date_format_26(self, settings_timezone: ZoneInfo): text = "CHASE 0 September 25, 2019 JPMorgan Chase Bank, NA. P0 Box 182051" assert parse_date("", text) == datetime.datetime( 2019, @@ -260,7 +238,7 @@ class TestDate: 25, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) def test_crazy_date_past(self): @@ -272,14 +250,14 @@ class TestDate: def test_crazy_date_with_spaces(self): assert parse_date("", "20 408000l 2475") is None - def test_utf_month_names(self): + def test_utf_month_names(self, settings_timezone: ZoneInfo): assert parse_date("", "13 décembre 2023") == datetime.datetime( 2023, 12, 13, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "13 août 2022") == datetime.datetime( 2022, @@ -287,7 +265,7 @@ class TestDate: 13, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "11 März 2020") == datetime.datetime( 2020, @@ -295,7 +273,7 @@ class TestDate: 11, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "17. ožujka 2018.") == datetime.datetime( 2018, @@ -303,7 +281,7 @@ class TestDate: 17, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "1. veljače 2016.") == datetime.datetime( 2016, @@ -311,7 +289,7 @@ class TestDate: 1, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "15. února 1985") == datetime.datetime( 1985, @@ -319,7 +297,7 @@ class TestDate: 15, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "30. září 2011") == datetime.datetime( 2011, @@ -327,7 +305,7 @@ class TestDate: 30, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "28. května 1990") == datetime.datetime( 1990, @@ -335,7 +313,7 @@ class TestDate: 28, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "1. grudzień 1997") == datetime.datetime( 1997, @@ -343,7 +321,7 @@ class TestDate: 1, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "17 Şubat 2024") == datetime.datetime( 2024, @@ -351,7 +329,7 @@ class TestDate: 17, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "30 Ağustos 2012") == datetime.datetime( 2012, @@ -359,7 +337,7 @@ class TestDate: 30, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "17 Eylül 2000") == datetime.datetime( 2000, @@ -367,7 +345,7 @@ class TestDate: 17, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert parse_date("", "5. október 1992") == datetime.datetime( 1992, @@ -375,30 +353,23 @@ class TestDate: 5, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - def test_multiple_dates(self): + def test_multiple_dates(self, settings_timezone: ZoneInfo): text = """This text has multiple dates. For example 02.02.2018, 22 July 2022 and December 2021. But not 24-12-9999 because it's in the future...""" dates = list(parse_date_generator("", text)) assert len(dates) == 3 - assert dates[0] == datetime.datetime( - 2018, - 2, - 2, - 0, - 0, - tzinfo=tz.gettz(settings.TIME_ZONE), - ) + assert dates[0] == datetime.datetime(2018, 2, 2, 0, 0, tzinfo=settings_timezone) assert dates[1] == datetime.datetime( 2022, 7, 22, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) assert dates[2] == datetime.datetime( 2021, @@ -406,11 +377,14 @@ class TestDate: 1, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - @override_settings(FILENAME_DATE_ORDER="YMD") - def test_filename_date_parse_valid_ymd(self, *args): + def test_filename_date_parse_valid_ymd( + self, + settings: SettingsWrapper, + settings_timezone: ZoneInfo, + ): """ GIVEN: - Date parsing from the filename is enabled @@ -420,13 +394,18 @@ class TestDate: THEN: - Should parse the date from the filename """ + settings.FILENAME_DATE_ORDER = "YMD" + assert parse_date( "/tmp/Scan-2022-04-01.pdf", "No date in here", - ) == datetime.datetime(2022, 4, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) + ) == datetime.datetime(2022, 4, 1, 0, 0, tzinfo=settings_timezone) - @override_settings(FILENAME_DATE_ORDER="DMY") - def test_filename_date_parse_valid_dmy(self, *args): + def test_filename_date_parse_valid_dmy( + self, + settings: SettingsWrapper, + settings_timezone: ZoneInfo, + ): """ GIVEN: - Date parsing from the filename is enabled @@ -436,13 +415,13 @@ class TestDate: THEN: - Should parse the date from the filename """ + settings.FILENAME_DATE_ORDER = "DMY" assert parse_date( "/tmp/Scan-10.01.2021.pdf", "No date in here", - ) == datetime.datetime(2021, 1, 10, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) + ) == datetime.datetime(2021, 1, 10, 0, 0, tzinfo=settings_timezone) - @override_settings(FILENAME_DATE_ORDER="YMD") - def test_filename_date_parse_invalid(self, *args): + def test_filename_date_parse_invalid(self, settings: SettingsWrapper): """ GIVEN: - Date parsing from the filename is enabled @@ -452,13 +431,14 @@ class TestDate: THEN: - No date is parsed """ + settings.FILENAME_DATE_ORDER = "YMD" assert parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here") is None - @override_settings( - FILENAME_DATE_ORDER="YMD", - IGNORE_DATES=(datetime.date(2022, 4, 1),), - ) - def test_filename_date_ignored_use_content(self, *args): + def test_filename_date_ignored_use_content( + self, + settings: SettingsWrapper, + settings_timezone: ZoneInfo, + ): """ GIVEN: - Date parsing from the filename is enabled @@ -471,15 +451,18 @@ class TestDate: THEN: - Should parse the date from the content not filename """ + settings.FILENAME_DATE_ORDER = "YMD" + settings.IGNORE_DATES = (datetime.date(2022, 4, 1),) assert parse_date( "/tmp/Scan-2022-04-01.pdf", "The matching date is 24.03.2022", - ) == datetime.datetime(2022, 3, 24, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) + ) == datetime.datetime(2022, 3, 24, 0, 0, tzinfo=settings_timezone) - @override_settings( - IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)), - ) - def test_ignored_dates_default_order(self, *args): + def test_ignored_dates_default_order( + self, + settings: SettingsWrapper, + settings_timezone: ZoneInfo, + ): """ GIVEN: - Ignore dates have been set @@ -489,6 +472,7 @@ class TestDate: THEN: - Should parse the date non-ignored date from content """ + settings.IGNORE_DATES = (datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)) text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem ipsum" assert parse_date("", text) == datetime.datetime( 2018, @@ -496,14 +480,14 @@ class TestDate: 13, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, ) - @override_settings( - IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)), - DATE_ORDER="YMD", - ) - def test_ignored_dates_order_ymd(self, *args): + def test_ignored_dates_order_ymd( + self, + settings: SettingsWrapper, + settings_timezone: ZoneInfo, + ): """ GIVEN: - Ignore dates have been set @@ -514,6 +498,10 @@ class TestDate: THEN: - Should parse the date non-ignored date from content """ + + settings.FILENAME_DATE_ORDER = "YMD" + settings.IGNORE_DATES = (datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)) + text = "lorem ipsum 190311, 20200117 and lorem 13.02.2018 lorem ipsum" assert parse_date("", text) == datetime.datetime( @@ -522,5 +510,5 @@ class TestDate: 13, 0, 0, - tzinfo=tz.gettz(settings.TIME_ZONE), + tzinfo=settings_timezone, )