From 9a6ebf2a1119d888038fdecb4401df97b5d4caa7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 7 Dec 2023 09:37:39 +0100 Subject: [PATCH] temp --- cpp/src/arrow/dataset/file_parquet_test.cc | 13 +++++++++++++ python/pyarrow/_dataset_parquet.pyx | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 76cd0af3b835f..efd311107256e 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -661,6 +661,19 @@ TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragments) { less(field_ref(FieldRef("struct", "i32")), literal(6))); CountRowGroupsInFragment(fragment, {1}, equal(field_ref(FieldRef("struct", "str")), literal("2"))); + + // unsupported combination of field type and kernel gives an error + auto filter = less(field_ref("list"), literal(6)); + auto parquet_fragment = checked_pointer_cast(fragment); + Status filter_status = parquet_fragment->SplitByRowGroup(filter).status(); + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, + testing::HasSubstr("Function 'less' has no kernel matching"), + filter_status); + + // filter on unsupported type (list) will return all RowGroups + CountRowGroupsInFragment(fragment, all_row_groups, less(field_ref("list"), literal(6))); + } TEST_P(TestParquetFileFormatScan, ExplicitRowGroupSelection) { diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index f83b78d9336b8..2acd724a56376 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -471,7 +471,7 @@ cdef class ParquetFileFragment(FileFragment): if filter is not None: schema = schema or self.physical_schema - c_filter = _bind(filter, schema) + c_filter = filter.unwrap() #_bind(filter, schema) with nogil: c_fragment = move(GetResultValue( self.parquet_file_fragment.SubsetWithFilter(