diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index c16d2f9aacf74..46e4b3aa52768 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1020,7 +1020,7 @@ def test_replace_slice(): offsets = range(-3, 4) arr = pa.array([None, '', 'a', 'ab', 'abc', 'abcd', 'abcde']) - series = arr.to_pandas() + series = arr.to_pandas().astype(object).replace({np.nan: None}) for start in offsets: for stop in offsets: expected = series.str.slice_replace(start, stop, 'XX') @@ -1031,7 +1031,7 @@ def test_replace_slice(): assert pc.binary_replace_slice(arr, start, stop, 'XX') == actual arr = pa.array([None, '', 'π', 'πb', 'πbθ', 'πbθd', 'πbθde']) - series = arr.to_pandas() + series = arr.to_pandas().astype(object).replace({np.nan: None}) for start in offsets: for stop in offsets: expected = series.str.slice_replace(start, stop, 'XX') @@ -2125,7 +2125,8 @@ def test_strftime(): for fmt in formats: options = pc.StrftimeOptions(fmt) result = pc.strftime(tsa, options=options) - expected = pa.array(ts.strftime(fmt)) + # cast to the same type as result to ignore string vs large_string + expected = pa.array(ts.strftime(fmt)).cast(result.type) assert result.equals(expected) fmt = "%Y-%m-%dT%H:%M:%S" @@ -2133,34 +2134,34 @@ def test_strftime(): # Default format tsa = pa.array(ts, type=pa.timestamp("s", timezone)) result = pc.strftime(tsa, options=pc.StrftimeOptions()) - expected = pa.array(ts.strftime(fmt)) + expected = pa.array(ts.strftime(fmt)).cast(result.type) assert result.equals(expected) # Default format plus timezone tsa = pa.array(ts, type=pa.timestamp("s", timezone)) result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z")) - expected = pa.array(ts.strftime(fmt + "%Z")) + expected = pa.array(ts.strftime(fmt + "%Z")).cast(result.type) assert result.equals(expected) # Pandas %S is equivalent to %S in arrow for unit="s" tsa = pa.array(ts, type=pa.timestamp("s", timezone)) options = pc.StrftimeOptions("%S") result = pc.strftime(tsa, options=options) - expected = pa.array(ts.strftime("%S")) + expected = pa.array(ts.strftime("%S")).cast(result.type) assert result.equals(expected) # Pandas %S.%f is equivalent to %S in arrow for unit="us" tsa = pa.array(ts, type=pa.timestamp("us", timezone)) options = pc.StrftimeOptions("%S") result = pc.strftime(tsa, options=options) - expected = pa.array(ts.strftime("%S.%f")) + expected = pa.array(ts.strftime("%S.%f")).cast(result.type) assert result.equals(expected) # Test setting locale tsa = pa.array(ts, type=pa.timestamp("s", timezone)) options = pc.StrftimeOptions(fmt, locale="C") result = pc.strftime(tsa, options=options) - expected = pa.array(ts.strftime(fmt)) + expected = pa.array(ts.strftime(fmt)).cast(result.type) assert result.equals(expected) # Test timestamps without timezone @@ -2168,7 +2169,7 @@ def test_strftime(): ts = pd.to_datetime(times) tsa = pa.array(ts, type=pa.timestamp("s")) result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt)) - expected = pa.array(ts.strftime(fmt)) + expected = pa.array(ts.strftime(fmt)).cast(result.type) # Positional format assert pc.strftime(tsa, fmt) == result diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py index 18c8cd5b654e6..72aaf3499ee31 100644 --- a/python/pyarrow/tests/test_feather.py +++ b/python/pyarrow/tests/test_feather.py @@ -426,7 +426,11 @@ def test_empty_strings(version): @pytest.mark.pandas def test_all_none(version): df = pd.DataFrame({'all_none': [None] * 10}) - _check_pandas_roundtrip(df, version=version) + if version == 1: + expected = df.astype("str") + else: + expected = df + _check_pandas_roundtrip(df, version=version, expected=expected) @pytest.mark.pandas