Skip to content

Commit

Permalink
apacheGH-29828: [Python] Support month and day-time interval types
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Mar 20, 2024
1 parent e52017a commit 97a0953
Show file tree
Hide file tree
Showing 14 changed files with 126 additions and 4 deletions.
4 changes: 4 additions & 0 deletions docs/source/python/api/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ may expose data type-specific methods or properties.
Date64Array
TimestampArray
DurationArray
MonthIntervalArray
DayTimeIntervalArray
MonthDayNanoIntervalArray
Decimal128Array
DictionaryArray
Expand Down Expand Up @@ -131,6 +133,8 @@ classes may expose data type-specific methods or properties.
Date64Scalar
TimestampScalar
DurationScalar
MonthIntervalScalar
DayTimeIntervalScalar
MonthDayNanoIntervalScalar
Decimal128Scalar
DictionaryScalar
Expand Down
2 changes: 2 additions & 0 deletions docs/source/python/api/datatypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ These should be used to create Arrow data types and schemas.
date32
date64
duration
month_interval
day_time_interval
month_day_nano_interval
binary
string
Expand Down
4 changes: 3 additions & 1 deletion python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def print_entry(label, value):
int8, int16, int32, int64,
uint8, uint16, uint32, uint64,
time32, time64, timestamp, date32, date64, duration,
month_day_nano_interval,
month_interval, day_time_interval, month_day_nano_interval,
float16, float32, float64,
binary, string, utf8, binary_view, string_view,
large_binary, large_string, large_utf8,
Expand Down Expand Up @@ -214,6 +214,7 @@ def print_entry(label, value):
DictionaryArray,
Date32Array, Date64Array, TimestampArray,
Time32Array, Time64Array, DurationArray,
MonthIntervalArray, DayTimeIntervalArray,
MonthDayNanoIntervalArray,
Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
RunEndEncodedArray, FixedShapeTensorArray,
Expand All @@ -228,6 +229,7 @@ def print_entry(label, value):
Date32Scalar, Date64Scalar,
Time32Scalar, Time64Scalar,
TimestampScalar, DurationScalar,
MonthIntervalScalar, DayTimeIntervalScalar,
MonthDayNanoIntervalScalar,
BinaryScalar, LargeBinaryScalar, BinaryViewScalar,
StringScalar, LargeStringScalar, StringViewScalar,
Expand Down
14 changes: 14 additions & 0 deletions python/pyarrow/array.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2089,6 +2089,18 @@ cdef class DurationArray(NumericArray):
"""


cdef class MonthIntervalArray(Array):
"""
Concrete class for Arrow arrays of interval[month] data type.
"""


cdef class DayTimeIntervalArray(Array):
"""
Concrete class for Arrow arrays of interval[day_time] data type.
"""


cdef class MonthDayNanoIntervalArray(Array):
"""
Concrete class for Arrow arrays of interval[MonthDayNano] type.
Expand Down Expand Up @@ -4353,6 +4365,8 @@ cdef dict _array_classes = {
_Type_TIME32: Time32Array,
_Type_TIME64: Time64Array,
_Type_DURATION: DurationArray,
_Type_INTERVAL_MONTHS: MonthIntervalArray,
_Type_INTERVAL_DAY_TIME: DayTimeIntervalArray,
_Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalArray,
_Type_HALF_FLOAT: HalfFloatArray,
_Type_FLOAT: FloatArray,
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
_Type_TIME32" arrow::Type::TIME32"
_Type_TIME64" arrow::Type::TIME64"
_Type_DURATION" arrow::Type::DURATION"
_Type_INTERVAL_MONTHS" arrow::Type::INTERVAL_MONTHS"
_Type_INTERVAL_DAY_TIME" arrow::Type::INTERVAL_DAY_TIME"
_Type_INTERVAL_MONTH_DAY_NANO" arrow::Type::INTERVAL_MONTH_DAY_NANO"

_Type_BINARY" arrow::Type::BINARY"
Expand Down
8 changes: 8 additions & 0 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,14 @@ cdef class ExtensionArray(Array):
pass


cdef class MonthIntervalArray(Array):
pass


cdef class DayTimeIntervalArray(Array):
pass


cdef class MonthDayNanoIntervalArray(Array):
pass

Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ Type_TIMESTAMP = _Type_TIMESTAMP
Type_TIME32 = _Type_TIME32
Type_TIME64 = _Type_TIME64
Type_DURATION = _Type_DURATION
Type_INTERVAL_MONTHS = _Type_INTERVAL_MONTHS
Type_INTERVAL_DAY_TIME = _Type_INTERVAL_DAY_TIME
Type_INTERVAL_MONTH_DAY_NANO = _Type_INTERVAL_MONTH_DAY_NANO
Type_BINARY = _Type_BINARY
Type_STRING = _Type_STRING
Expand Down
36 changes: 36 additions & 0 deletions python/pyarrow/scalar.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,40 @@ cdef class DurationScalar(Scalar):
return datetime.timedelta(microseconds=sp.value // 1000)


cdef class MonthIntervalScalar(Scalar):
"""
Concrete class for month interval scalars.
"""

def __repr__(self):
return '<pyarrow.{}: {}>'.format(
self.__class__.__name__, frombytes(self.wrapped.get().ToString())
)

def __str__(self):
return frombytes(self.wrapped.get().ToString())

def as_py(self):
raise NotImplementedError()


cdef class DayTimeIntervalScalar(Scalar):
"""
Concrete class for day-time interval scalars.
"""

def __repr__(self):
return '<pyarrow.{}: {}>'.format(
self.__class__.__name__, frombytes(self.wrapped.get().ToString())
)

def __str__(self):
return frombytes(self.wrapped.get().ToString())

def as_py(self):
raise NotImplementedError()


cdef class MonthDayNanoIntervalScalar(Scalar):
"""
Concrete class for month, day, nanosecond interval scalars.
Expand Down Expand Up @@ -1124,6 +1158,8 @@ cdef dict _scalar_classes = {
_Type_RUN_END_ENCODED: RunEndEncodedScalar,
_Type_SPARSE_UNION: UnionScalar,
_Type_DENSE_UNION: UnionScalar,
_Type_INTERVAL_MONTHS: MonthIntervalScalar,
_Type_INTERVAL_DAY_TIME: DayTimeIntervalScalar,
_Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalScalar,
_Type_EXTENSION: ExtensionScalar,
}
Expand Down
2 changes: 2 additions & 0 deletions python/pyarrow/src/arrow/python/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
GET_PRIMITIVE_TYPE(LARGE_STRING, large_utf8);
GET_PRIMITIVE_TYPE(BINARY_VIEW, binary_view);
GET_PRIMITIVE_TYPE(STRING_VIEW, utf8_view);
GET_PRIMITIVE_TYPE(INTERVAL_MONTHS, month_interval);
GET_PRIMITIVE_TYPE(INTERVAL_DAY_TIME, day_time_interval);
GET_PRIMITIVE_TYPE(INTERVAL_MONTH_DAY_NANO, month_day_nano_interval);
default:
return nullptr;
Expand Down
25 changes: 25 additions & 0 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3681,3 +3681,28 @@ def test_pairwise_diff():
with pytest.raises(pa.ArrowInvalid,
match="overflow"):
pa.compute.pairwise_diff_checked(arr, period=-1)


def test_interval_between():
arr1 = pa.array(
[datetime.datetime(2020, 1, 1), None, datetime.datetime(1900, 12, 13)],
type=pa.timestamp('s')
)
arr2 = pa.array(
[datetime.datetime(2020, 5, 5), datetime.datetime(2020, 10, 1),
datetime.datetime(2020, 1, 1)],
type=pa.timestamp('s')
)
result = pc.month_day_nano_interval_between(arr1, arr2)
assert isinstance(result, pa.MonthDayNanoIntervalArray)
assert isinstance(result[0], pa.MonthDayNanoIntervalScalar)

result = pc.month_interval_between(arr1, arr2)
assert isinstance(result, pa.MonthIntervalArray)
assert isinstance(result[0], pa.MonthIntervalScalar)
assert str(result[0]) == "4M"

result = pc.day_time_interval_between(arr1, arr2)
assert isinstance(result, pa.DayTimeIntervalArray)
assert isinstance(result[0], pa.DayTimeIntervalScalar)
assert str(result[0]) == "125d0ms"
6 changes: 6 additions & 0 deletions python/pyarrow/tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ def test_set_timezone_db_path_non_windows():
pa.Time32Array,
pa.Time64Array,
pa.DurationArray,
pa.MonthIntervalArray,
pa.DayTimeIntervalArray,
pa.MonthDayNanoIntervalArray,
pa.Decimal128Array,
pa.Decimal256Array,
pa.StructArray,
Expand All @@ -222,6 +225,9 @@ def test_set_timezone_db_path_non_windows():
pa.Time64Scalar,
pa.TimestampScalar,
pa.DurationScalar,
pa.MonthIntervalScalar,
pa.DayTimeIntervalScalar,
pa.MonthDayNanoIntervalScalar,
pa.StringScalar,
pa.BinaryScalar,
pa.FixedSizeBinaryScalar,
Expand Down
6 changes: 5 additions & 1 deletion python/pyarrow/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def get_many_types():
pa.timestamp('us', tz='UTC'),
pa.timestamp('us', tz='Europe/Paris'),
pa.duration('s'),
pa.month_interval(),
pa.day_time_interval(),
pa.month_day_nano_interval(),
pa.float16(),
pa.float32(),
pa.float64(),
Expand Down Expand Up @@ -275,7 +278,8 @@ def test_is_temporal_date_time_timestamp():
time_types = [pa.time32('s'), pa.time64('ns')]
timestamp_types = [pa.timestamp('ms')]
duration_types = [pa.duration('ms')]
interval_types = [pa.month_day_nano_interval()]
interval_types = [pa.month_interval(), pa.day_time_interval(),
pa.month_day_nano_interval()]

for case in (date_types + time_types + timestamp_types + duration_types +
interval_types):
Expand Down
14 changes: 14 additions & 0 deletions python/pyarrow/types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -4067,6 +4067,18 @@ def duration(unit):
return out


def month_interval():
"""
"""
return primitive_type(_Type_INTERVAL_MONTHS)


def day_time_interval():
"""
"""
return primitive_type(_Type_INTERVAL_DAY_TIME)


def month_day_nano_interval():
"""
Create instance of an interval type representing months, days and
Expand Down Expand Up @@ -5228,6 +5240,8 @@ cdef dict _type_aliases = {
'duration[ms]': duration('ms'),
'duration[us]': duration('us'),
'duration[ns]': duration('ns'),
'month_interval': month_interval(),
'day_time_interval': day_time_interval(),
'month_day_nano_interval': month_day_nano_interval(),
}

Expand Down
5 changes: 3 additions & 2 deletions python/pyarrow/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
_DECIMAL_TYPES = {lib.Type_DECIMAL128, lib.Type_DECIMAL256}
_DATE_TYPES = {lib.Type_DATE32, lib.Type_DATE64}
_TIME_TYPES = {lib.Type_TIME32, lib.Type_TIME64}
_INTERVAL_TYPES = {lib.Type_INTERVAL_MONTH_DAY_NANO}
_INTERVAL_TYPES = {lib.Type_INTERVAL_MONTHS, lib.Type_INTERVAL_DAY_TIME,
lib.Type_INTERVAL_MONTH_DAY_NANO}
_TEMPORAL_TYPES = ({lib.Type_TIMESTAMP,
lib.Type_DURATION} | _TIME_TYPES | _DATE_TYPES |
_INTERVAL_TYPES)
Expand Down Expand Up @@ -306,7 +307,7 @@ def is_dictionary(t):

@doc(is_null, datatype="interval")
def is_interval(t):
return t.id == lib.Type_INTERVAL_MONTH_DAY_NANO
return t.id in _INTERVAL_TYPES


@doc(is_null, datatype="primitive type")
Expand Down

0 comments on commit 97a0953

Please sign in to comment.