diff --git a/src/documents/tests/test_date_parsing.py b/src/documents/tests/test_date_parsing.py index 253095275..f923aa48a 100644 --- a/src/documents/tests/test_date_parsing.py +++ b/src/documents/tests/test_date_parsing.py @@ -2,40 +2,47 @@ import datetime from dateutil import tz from django.conf import settings -from django.test import TestCase from django.test import override_settings from documents.parsers import parse_date from documents.parsers import parse_date_generator -class TestDate(TestCase): +class TestDate: def test_date_format_1(self): text = "lorem ipsum 130218 lorem ipsum" - self.assertEqual(parse_date("", text), None) + assert parse_date("", text) is None def test_date_format_2(self): text = "lorem ipsum 2018 lorem ipsum" - self.assertEqual(parse_date("", text), None) + assert parse_date("", text) is None def test_date_format_3(self): text = "lorem ipsum 20180213 lorem ipsum" - self.assertEqual(parse_date("", text), None) + assert parse_date("", text) is None def test_date_format_4(self): text = "lorem ipsum 13.02.2018 lorem ipsum" date = parse_date("", text) - self.assertEqual( - date, - datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert date == datetime.datetime( + 2018, + 2, + 13, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_5(self): text = "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem ipsum" date = parse_date("", text) - self.assertEqual( - date, - datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert date == datetime.datetime( + 2018, + 2, + 13, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_6(self): @@ -50,14 +57,18 @@ class TestDate(TestCase): "BIC\n" "lorem ipsum" ) - self.assertEqual(parse_date("", text), None) + assert parse_date("", text) is None def test_date_format_7(self): text = "lorem ipsum\nMärz 2019\nlorem ipsum" date = parse_date("", text) - self.assertEqual( - date, - datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert date == datetime.datetime( + 2019, + 3, + 1, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_8(self): @@ -73,186 +84,298 @@ class TestDate(TestCase): "lorem ipsum\n" "März 2020" ) - self.assertEqual( - parse_date("", text), - datetime.datetime(2020, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2020, + 3, + 1, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_9(self): text = "lorem ipsum\n27. Nullmonth 2020\nMärz 2020\nlorem ipsum" - self.assertEqual( - parse_date("", text), - datetime.datetime(2020, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2020, + 3, + 1, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_10(self): text = "Customer Number Currency 22-MAR-2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 22, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_11(self): text = "Customer Number Currency 22 MAR 2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 22, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_12(self): text = "Customer Number Currency 22/MAR/2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 22, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_13(self): text = "Customer Number Currency 22.MAR.2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 22, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_14(self): text = "Customer Number Currency 22.MAR 2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 22, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_15(self): text = "Customer Number Currency 22.MAR.22 Credit Card 1934829304" - self.assertIsNone(parse_date("", text), None) + assert parse_date("", text) is None, None def test_date_format_16(self): text = "Customer Number Currency 22.MAR,22 Credit Card 1934829304" - self.assertIsNone(parse_date("", text), None) + assert parse_date("", text) is None, None def test_date_format_17(self): text = "Customer Number Currency 22,MAR,2022 Credit Card 1934829304" - self.assertIsNone(parse_date("", text), None) + assert parse_date("", text) is None, None def test_date_format_18(self): text = "Customer Number Currency 22 MAR,2022 Credit Card 1934829304" - self.assertIsNone(parse_date("", text), None) + assert parse_date("", text) is None, None def test_date_format_19(self): text = "Customer Number Currency 21st MAR 2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 21, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 21, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_20(self): text = "Customer Number Currency 22nd March 2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 22, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_21(self): text = "Customer Number Currency 2nd MAR 2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 2, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 2, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_22(self): text = "Customer Number Currency 23rd MAR 2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 23, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 23, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_23(self): text = "Customer Number Currency 24th MAR 2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 24, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 24, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_24(self): text = "Customer Number Currency 21-MAR-2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 21, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 21, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_25(self): text = "Customer Number Currency 25TH MAR 2022 Credit Card 1934829304" - self.assertEqual( - parse_date("", text), - datetime.datetime(2022, 3, 25, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2022, + 3, + 25, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_date_format_26(self): text = "CHASE 0 September 25, 2019 JPMorgan Chase Bank, NA. P0 Box 182051" - self.assertEqual( - parse_date("", text), - datetime.datetime(2019, 9, 25, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2019, + 9, + 25, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_crazy_date_past(self): - self.assertIsNone(parse_date("", "01-07-0590 00:00:00")) + assert parse_date("", "01-07-0590 00:00:00") is None def test_crazy_date_future(self): - self.assertIsNone(parse_date("", "01-07-2350 00:00:00")) + assert parse_date("", "01-07-2350 00:00:00") is None def test_crazy_date_with_spaces(self): - self.assertIsNone(parse_date("", "20 408000l 2475")) + assert parse_date("", "20 408000l 2475") is None def test_utf_month_names(self): - self.assertEqual( - parse_date("", "13 décembre 2023"), - datetime.datetime(2023, 12, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "13 décembre 2023") == datetime.datetime( + 2023, + 12, + 13, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "13 août 2022"), - datetime.datetime(2022, 8, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "13 août 2022") == datetime.datetime( + 2022, + 8, + 13, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "11 März 2020"), - datetime.datetime(2020, 3, 11, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "11 März 2020") == datetime.datetime( + 2020, + 3, + 11, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "17. ožujka 2018."), - datetime.datetime(2018, 3, 17, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "17. ožujka 2018.") == datetime.datetime( + 2018, + 3, + 17, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "1. veljače 2016."), - datetime.datetime(2016, 2, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "1. veljače 2016.") == datetime.datetime( + 2016, + 2, + 1, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "15. února 1985"), - datetime.datetime(1985, 2, 15, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "15. února 1985") == datetime.datetime( + 1985, + 2, + 15, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "30. září 2011"), - datetime.datetime(2011, 9, 30, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "30. září 2011") == datetime.datetime( + 2011, + 9, + 30, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "28. května 1990"), - datetime.datetime(1990, 5, 28, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "28. května 1990") == datetime.datetime( + 1990, + 5, + 28, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "1. grudzień 1997"), - datetime.datetime(1997, 12, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "1. grudzień 1997") == datetime.datetime( + 1997, + 12, + 1, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "17 Şubat 2024"), - datetime.datetime(2024, 2, 17, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "17 Şubat 2024") == datetime.datetime( + 2024, + 2, + 17, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "30 Ağustos 2012"), - datetime.datetime(2012, 8, 30, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "30 Ağustos 2012") == datetime.datetime( + 2012, + 8, + 30, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "17 Eylül 2000"), - datetime.datetime(2000, 9, 17, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "17 Eylül 2000") == datetime.datetime( + 2000, + 9, + 17, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - parse_date("", "5. október 1992"), - datetime.datetime(1992, 10, 5, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", "5. október 1992") == datetime.datetime( + 1992, + 10, + 5, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) def test_multiple_dates(self): @@ -260,18 +383,30 @@ class TestDate(TestCase): For example 02.02.2018, 22 July 2022 and December 2021. But not 24-12-9999 because it's in the future...""" dates = list(parse_date_generator("", text)) - self.assertEqual(len(dates), 3) - self.assertEqual( - dates[0], - datetime.datetime(2018, 2, 2, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert len(dates) == 3 + assert dates[0] == datetime.datetime( + 2018, + 2, + 2, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - dates[1], - datetime.datetime(2022, 7, 22, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert dates[1] == datetime.datetime( + 2022, + 7, + 22, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) - self.assertEqual( - dates[2], - datetime.datetime(2021, 12, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert dates[2] == datetime.datetime( + 2021, + 12, + 1, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) @override_settings(FILENAME_DATE_ORDER="YMD") @@ -285,10 +420,10 @@ class TestDate(TestCase): THEN: - Should parse the date from the filename """ - self.assertEqual( - parse_date("/tmp/Scan-2022-04-01.pdf", "No date in here"), - datetime.datetime(2022, 4, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), - ) + assert parse_date( + "/tmp/Scan-2022-04-01.pdf", + "No date in here", + ) == datetime.datetime(2022, 4, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) @override_settings(FILENAME_DATE_ORDER="DMY") def test_filename_date_parse_valid_dmy(self, *args): @@ -301,10 +436,10 @@ class TestDate(TestCase): THEN: - Should parse the date from the filename """ - self.assertEqual( - parse_date("/tmp/Scan-10.01.2021.pdf", "No date in here"), - datetime.datetime(2021, 1, 10, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), - ) + assert parse_date( + "/tmp/Scan-10.01.2021.pdf", + "No date in here", + ) == datetime.datetime(2021, 1, 10, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) @override_settings(FILENAME_DATE_ORDER="YMD") def test_filename_date_parse_invalid(self, *args): @@ -317,9 +452,7 @@ class TestDate(TestCase): THEN: - No date is parsed """ - self.assertIsNone( - parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"), - ) + assert parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here") is None @override_settings( FILENAME_DATE_ORDER="YMD", @@ -338,10 +471,10 @@ class TestDate(TestCase): THEN: - Should parse the date from the content not filename """ - self.assertEqual( - parse_date("/tmp/Scan-2022-04-01.pdf", "The matching date is 24.03.2022"), - datetime.datetime(2022, 3, 24, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), - ) + assert parse_date( + "/tmp/Scan-2022-04-01.pdf", + "The matching date is 24.03.2022", + ) == datetime.datetime(2022, 3, 24, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)) @override_settings( IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)), @@ -357,9 +490,13 @@ class TestDate(TestCase): - Should parse the date non-ignored date from content """ text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem ipsum" - self.assertEqual( - parse_date("", text), - datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2018, + 2, + 13, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), ) @override_settings( @@ -379,7 +516,11 @@ class TestDate(TestCase): """ text = "lorem ipsum 190311, 20200117 and lorem 13.02.2018 lorem ipsum" - self.assertEqual( - parse_date("", text), - datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)), + assert parse_date("", text) == datetime.datetime( + 2018, + 2, + 13, + 0, + 0, + tzinfo=tz.gettz(settings.TIME_ZONE), )