Skip to content

Commit

Permalink
Merge pull request #65 from dh-tech/feature/5-date-comparisons
Browse files Browse the repository at this point in the history
preliminary date comparison methods
  • Loading branch information
rlskoeser authored Apr 25, 2024
2 parents ab378da + fcdd4c2 commit 1cef86b
Show file tree
Hide file tree
Showing 3 changed files with 257 additions and 19 deletions.
111 changes: 96 additions & 15 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from calendar import monthrange
from enum import Enum, auto
from enum import IntEnum
import re

# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
Expand All @@ -15,16 +15,22 @@
ONE_DAY = datetime.timedelta(days=1)


class DatePrecision(Enum):
class DatePrecision(IntEnum):
"""date precision, to indicate date precision independent from how much
of the date is known."""

#: year
YEAR = auto()
#: month
MONTH = auto()
# numbers should be set to allow logical greater than / less than
# comparison, e.g. year precision > month

#: day
DAY = auto()
DAY = 1
#: month
MONTH = 2
#: year
YEAR = 3

def __str__(self):
return f"{self.name}"


class Undate:
Expand Down Expand Up @@ -173,18 +179,93 @@ def __repr__(self) -> str:
return "<Undate '%s' (%s)>" % (self.label, self)
return "<Undate %s>" % self

def __eq__(self, other: "Undate") -> bool:
# question: should label be taken into account when checking equality?
# for now, assuming label differences don't matter for comparing dates
return (
def __eq__(self, other: Union["Undate", datetime.date]) -> bool:
# Note: assumes label differences don't matter for comparing dates

# only a day-precision fully known undate can be equal to a datetime.date
if isinstance(other, datetime.date):
return self.earliest == other and self.latest == other

# check for apparent equality
looks_equal = (
self.earliest == other.earliest
and self.latest == other.latest
# NOTE: assumes that partially known values can only be written
# in one format (i.e. X for missing digits).
# If we support other formats, will need to normalize to common
# internal format for comparison
and self.initial_values == other.initial_values
)
# if everything looks the same, check for any unknowns in initial values
# the same unknown date should NOT be considered equal

# NOTE: assumes that partially known values can only be written
# in one format (i.e. X for missing digits).
# If we support other formats, will need to normalize to common
# internal format for comparison
if looks_equal and any("X" in str(val) for val in self.initial_values.values()):
return False
return looks_equal

def __lt__(self, other: Union["Undate", datetime.date]) -> bool:
# support datetime.date by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)

# if this date ends before the other date starts,
# return true (this date is earlier, so it is less)
if self.latest < other.earliest:
return True

# if the other one ends before this one starts,
# return false (this date is later, so it is not less)
if other.latest < self.earliest:
return False

# if it does not, check if one is included within the other
# (e.g., single date within the same year)
# comparison for those cases is not currently supported
elif other in self or self in other:
raise NotImplementedError(
"Can't compare when one date falls within the other"
)
# NOTE: unsupported comparisons are supposed to return NotImplemented
# However, doing that in this case results in a confusing TypeError!
# TypeError: '<' not supported between instances of 'Undate' and 'Undate'
# How to handle when the comparison is ambiguous / indeterminate?
# we may need a tribool / ternary type (true, false, unknown),
# but not sure what python builtin methods will do with it (unknown = false?)

# for any other case (i.e., self == other), return false
return False

def __gt__(self, other: Union["Undate", datetime.date]) -> bool:
# define gt ourselves so we can support > comparison with datetime.date,
# but rely on existing less than implementation.
# strictly greater than must rule out equals
return not (self < other or self == other)

def __le__(self, other: Union["Undate", datetime.date]) -> bool:
return self == other or self < other

def __contains__(self, other: Union["Undate", datetime.date]) -> bool:
# if the two dates are strictly equal, don't consider
# either one as containing the other

# support comparison with datetime by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)

if self == other:
return False

return (
self.earliest <= other.earliest
and self.latest >= other.latest
# is precision sufficient for comparing partially known dates?
and self.precision > other.precision
)

@staticmethod
def from_datetime_date(dt_date):
"""Initialize an :class:`Undate` object from a :class:`datetime.date`"""
return Undate(dt_date.year, dt_date.month, dt_date.day)

@property
def known_year(self) -> bool:
Expand Down
7 changes: 5 additions & 2 deletions tests/test_dateformat/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ def test_parse_to_string(self):
BaseDateFormat().to_string(1991)


@pytest.mark.first
def test_import_formatters_import_only_once(caplog):
# run first so we can confirm it runs once
# clear the cache, since any instantiation of an Undate
# object anywhere in the test suite will populate it
BaseDateFormat.import_formatters.cache_clear()

# run first, and confirm it runs and loads formatters
with caplog.at_level(logging.DEBUG):
import_count = BaseDateFormat.import_formatters()
# should import at least one thing (iso8601)
Expand Down
158 changes: 156 additions & 2 deletions tests/test_undate.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from datetime import timedelta
from datetime import timedelta, date

import pytest

from undate.undate import Undate, UndateInterval
from undate.undate import Undate, UndateInterval, DatePrecision


class TestDatePrecision:
def test_str(self):
assert str(DatePrecision.YEAR) == "YEAR"


class TestUndate:
Expand Down Expand Up @@ -121,12 +126,27 @@ def test_invalid_date(self):
with pytest.raises(ValueError):
Undate(1990, 22)

def test_from_datetime_date(self):
undate_from_date = Undate.from_datetime_date(date(2001, 3, 5))
assert isinstance(undate_from_date, Undate)
assert undate_from_date == Undate(2001, 3, 5)

def test_eq(self):
assert Undate(2022) == Undate(2022)
assert Undate(2022, 10) == Undate(2022, 10)
assert Undate(2022, 10, 1) == Undate(2022, 10, 1)
assert Undate(month=2, day=7) == Undate(month=2, day=7)

def test_eq_datetime_date(self):
# support comparisons with datetime objects for full day-precision
assert Undate(2022, 10, 1) == date(2022, 10, 1)
assert Undate(2022, 10, 1) != date(2022, 10, 2)
assert Undate(1980, 10, 1) != date(2022, 10, 1)

# other date precisions are not equal
assert Undate(2022) != date(2022, 10, 1)
assert Undate(2022, 10) != date(2022, 10, 1)

def test_not_eq(self):
assert Undate(2022) != Undate(2023)
assert Undate(2022, 10) != Undate(2022, 11)
Expand All @@ -135,6 +155,140 @@ def test_not_eq(self):
assert Undate(2022) != Undate(2022, 10)
assert Undate(2022, 10) != Undate(2022, 10, 1)

# partially unknown dates should NOT be considered equal
assert Undate("19XX") != Undate("19XX")
assert Undate(1980, "XX") != Undate(1980, "XX")

testdata_lt_gt = [
# dates to test for gt/lt comparison: earlier date, later date
# - simple cases: same precision where one date is clearly earlier
(Undate(2022), Undate(2023)),
(Undate(1991, 1), Undate(1991, 5)),
(Undate(1856, 3, 3), Undate(1856, 3, 21)),
# - mixed precision where one date is clearly earlier
(Undate(1991, 1), Undate(2000)),
(Undate(1856, 3, 3), Undate(1901)),
# partially known digits where comparison is possible
(Undate("19XX"), Undate("20XX")),
(Undate(1900, "0X"), Undate(1900, "1X")),
# compare with datetime.date objects
(Undate("19XX"), date(2020, 1, 1)),
(Undate(1991, 1), date(1992, 3, 4)),
]

@pytest.mark.parametrize("earlier,later", testdata_lt_gt)
def test_lt(self, earlier, later):
assert earlier < later
assert later > earlier

testdata_lte_gte = testdata_lt_gt.copy()
# add a few exactly equal cases
testdata_lte_gte.extend(
[
(Undate(1601), Undate(1601)),
(Undate(1991, 1), Undate(1991, 1)),
(Undate(1492, 5, 3), Undate(1492, 5, 3)),
# compare with datetime.date also
(Undate(1492, 5, 3), date(1492, 5, 3)),
]
)

def test_lt_when_eq(self):
# strict less than / greater should return false when equal
assert not Undate(1900) > Undate(1900)
assert not Undate(1900) < Undate(1900)
# same for datetime.date
assert not Undate(1903, 1, 5) < date(1903, 1, 5)
assert not Undate(1903, 1, 5) > date(1903, 1, 5)

@pytest.mark.parametrize("earlier,later", testdata_lte_gte)
def test_lte(self, earlier, later):
assert earlier <= later
assert later >= earlier

def test_lt_notimplemented(self):
# how to compare mixed precision where dates overlap?
# if the second date falls *within* earliest/latest,
# then it is not clearly less; not implemented?
with pytest.raises(NotImplementedError, match="date falls within the other"):
assert Undate(2022) < Undate(2022, 5)

# same if we attempt to compare in the other direction
with pytest.raises(NotImplementedError, match="date falls within the other"):
assert Undate(2022, 5) < Undate(2022)

testdata_contains = [
# first date falls within the range of the other
# dates within range: middle, start, end, varying precision
(Undate(2022, 6), Undate(2022)),
(Undate(2022, 1, 1), Undate(2022)),
(Undate(2022, 12, 31), Undate(2022)),
(Undate(2022, 6, 15), Undate(2022, 6)),
# support contains with datetime.date
(date(2022, 6, 1), Undate(2022)),
(date(2022, 6, 1), Undate(2022, 6)),
]

@pytest.mark.parametrize("date1,date2", testdata_contains)
def test_contains(self, date1, date2):
assert date1 in date2

testdata_not_contains = [
# dates not in range
(Undate(1980), Undate(2020)),
(Undate(1980), Undate(2020, 6)),
(Undate(1980, 6), Undate(2020, 6)),
# support contains with datetime.date
(date(1980, 6, 1), Undate(2022)),
(date(3001, 6, 1), Undate(2022, 6)),
# partially known dates that are similar but same precision,
# so one does not contain the other
(Undate("199X"), Undate("19XX")),
# - specific month to unknown month
(Undate(1980, 6), Undate(1980, "XX")),
# some of these might overlap, but we don't have enough
# information to determine
# - unknown month to unknown month
(Undate(1980, "XX"), Undate(1980, "XX")),
# - partially unknown month to unknown month
(Undate(1801, "1X"), Undate(1801, "XX")),
]

@pytest.mark.parametrize("date1,date2", testdata_not_contains)
def test_not_contains(self, date1, date2):
assert date1 not in date2

def test_sorting(self):
# sorting should be possible based on gt/lt
# test simple cases for sorting
d1980 = Undate(1980)
d2002_10 = Undate(2002, 10)
d2002_12 = Undate(2002, 12)
d2012_05_01 = Undate(2012, 5, 1)

assert sorted([d2012_05_01, d2002_12, d2002_10, d1980]) == [
d1980,
d2002_10,
d2002_12,
d2012_05_01,
]

# what about semi-ambigous cases?
d1991_XX = Undate(1991, "XX")
d1992_01_XX = Undate(1992, 1, "XX")
assert sorted([d1992_01_XX, d1991_XX, d1980]) == [d1980, d1991_XX, d1992_01_XX]

# what about things we can't compare?
d1991 = Undate(1991)
d1991_02 = Undate(1991, 2)
# for now, this will raise a not implemented error
with pytest.raises(NotImplementedError):
sorted([d1991_02, d1991, d1991_XX])

# TODO: partially known year?
# someyear = Undate("1XXX")
# assert sorted([d1991, someyear]) == [someyear, d1991]

def test_duration(self):
day_duration = Undate(2022, 11, 7).duration()
assert isinstance(day_duration, timedelta)
Expand Down

0 comments on commit 1cef86b

Please sign in to comment.