Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into apachegh-41884-reco…
Browse files Browse the repository at this point in the history
…rdbatchreader-cast
  • Loading branch information
jorisvandenbossche committed Jun 13, 2024
2 parents b1278d2 + 8ae1edb commit 254bdbf
Show file tree
Hide file tree
Showing 1,164 changed files with 42,061 additions and 36,268 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
/go/ @zeroshade
/java/ @lidavidm
/js/ @domoritz @trxcllnt
/matlab/ @kevingurney @kou
/matlab/ @kevingurney @kou @sgilmore10
/python/pyarrow/_flight.pyx @lidavidm
/python/pyarrow/**/*gandiva* @wjones127
/r/ @paleolimbot @thisisnic
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ __debug_bin
.envrc

# Develocity
.mvn/.gradle-enterprise/
.mvn/.develocity/
java/.mvn/.gradle-enterprise/
java/.mvn/.develocity/

# rat
filtered_rat.txt
Expand Down
20 changes: 20 additions & 0 deletions ci/scripts/python_wheel_manylinux_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,26 @@ export CMAKE_PREFIX_PATH=/tmp/arrow-dist
pushd /arrow/python
python setup.py bdist_wheel

echo "=== Strip symbols from wheel ==="
mkdir -p dist/temp-fix-wheel
mv dist/pyarrow-*.whl dist/temp-fix-wheel

pushd dist/temp-fix-wheel
wheel_name=$(ls pyarrow-*.whl)
# Unzip and remove old wheel
unzip $wheel_name
rm $wheel_name
for filename in $(ls pyarrow/*.so pyarrow/*.so.*); do
echo "Stripping debug symbols from: $filename";
strip --strip-debug $filename
done
# Zip wheel again after stripping symbols
zip -r $wheel_name .
mv $wheel_name ..
popd

rm -rf dist/temp-fix-wheel

echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ==="
auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels
popd
40 changes: 34 additions & 6 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4613,8 +4613,11 @@ macro(build_opentelemetry)
set(_OPENTELEMETRY_LIBS
common
http_client_curl
logs
ostream_log_record_exporter
ostream_span_exporter
otlp_http_client
otlp_http_log_record_exporter
otlp_http_exporter
otlp_recordable
proto
Expand Down Expand Up @@ -4647,6 +4650,14 @@ macro(build_opentelemetry)
set(_OPENTELEMETRY_STATIC_LIBRARY
"${OPENTELEMETRY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}opentelemetry_exporter_otlp_http${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
elseif(_OPENTELEMETRY_LIB STREQUAL "otlp_http_log_record_exporter")
set(_OPENTELEMETRY_STATIC_LIBRARY
"${OPENTELEMETRY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}opentelemetry_exporter_otlp_http_log${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
elseif(_OPENTELEMETRY_LIB STREQUAL "ostream_log_record_exporter")
set(_OPENTELEMETRY_STATIC_LIBRARY
"${OPENTELEMETRY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}opentelemetry_exporter_ostream_logs${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
else()
set(_OPENTELEMETRY_STATIC_LIBRARY
"${OPENTELEMETRY_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}opentelemetry_${_OPENTELEMETRY_LIB}${CMAKE_STATIC_LIBRARY_SUFFIX}"
Expand Down Expand Up @@ -4681,9 +4692,16 @@ macro(build_opentelemetry)
IMPORTED_LOCATION)
list(APPEND
OPENTELEMETRY_CMAKE_ARGS
-DWITH_OTLP=ON
-DWITH_OTLP_HTTP=ON
-DWITH_OTLP_GRPC=OFF
# Disabled because it seemed to cause linking errors. May be worth a closer look.
-DWITH_FUNC_TESTS=OFF
# These options are slated for removal in v1.14 and their features are deemed stable
# as of v1.13. However, setting their corresponding ENABLE_* macros in headers seems
# finicky - resulting in build failures or ABI-related runtime errors during HTTP
# client initialization. There may still be a solution, but we disable them for now.
-DWITH_OTLP_HTTP_SSL_PREVIEW=OFF
-DWITH_OTLP_HTTP_SSL_TLS_PREVIEW=OFF
"-DProtobuf_INCLUDE_DIR=${OPENTELEMETRY_PROTOBUF_INCLUDE_DIR}"
"-DProtobuf_LIBRARY=${OPENTELEMETRY_PROTOBUF_INCLUDE_DIR}"
"-DProtobuf_PROTOC_EXECUTABLE=${OPENTELEMETRY_PROTOC_EXECUTABLE}")
Expand Down Expand Up @@ -4757,19 +4775,25 @@ macro(build_opentelemetry)
target_link_libraries(opentelemetry-cpp::resources INTERFACE opentelemetry-cpp::common)
target_link_libraries(opentelemetry-cpp::trace INTERFACE opentelemetry-cpp::common
opentelemetry-cpp::resources)
target_link_libraries(opentelemetry-cpp::logs INTERFACE opentelemetry-cpp::common
opentelemetry-cpp::resources)
target_link_libraries(opentelemetry-cpp::http_client_curl
INTERFACE opentelemetry-cpp::ext CURL::libcurl)
INTERFACE opentelemetry-cpp::common opentelemetry-cpp::ext
CURL::libcurl)
target_link_libraries(opentelemetry-cpp::proto INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
target_link_libraries(opentelemetry-cpp::otlp_recordable
INTERFACE opentelemetry-cpp::trace opentelemetry-cpp::resources
opentelemetry-cpp::proto)
INTERFACE opentelemetry-cpp::logs opentelemetry-cpp::trace
opentelemetry-cpp::resources opentelemetry-cpp::proto)
target_link_libraries(opentelemetry-cpp::otlp_http_client
INTERFACE opentelemetry-cpp::sdk opentelemetry-cpp::proto
INTERFACE opentelemetry-cpp::common opentelemetry-cpp::proto
opentelemetry-cpp::http_client_curl
nlohmann_json::nlohmann_json)
target_link_libraries(opentelemetry-cpp::otlp_http_exporter
INTERFACE opentelemetry-cpp::otlp_recordable
opentelemetry-cpp::otlp_http_client)
target_link_libraries(opentelemetry-cpp::otlp_http_log_record_exporter
INTERFACE opentelemetry-cpp::otlp_recordable
opentelemetry-cpp::otlp_http_client)

foreach(_OPENTELEMETRY_LIB ${_OPENTELEMETRY_LIBS})
add_dependencies(opentelemetry-cpp::${_OPENTELEMETRY_LIB} opentelemetry_ep)
Expand All @@ -4791,7 +4815,11 @@ if(ARROW_WITH_OPENTELEMETRY)
set(opentelemetry-cpp_SOURCE "AUTO")
resolve_dependency(opentelemetry-cpp)
set(ARROW_OPENTELEMETRY_LIBS
opentelemetry-cpp::trace opentelemetry-cpp::ostream_span_exporter
opentelemetry-cpp::trace
opentelemetry-cpp::logs
opentelemetry-cpp::otlp_http_log_record_exporter
opentelemetry-cpp::ostream_log_record_exporter
opentelemetry-cpp::ostream_span_exporter
opentelemetry-cpp::otlp_http_exporter)
get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api
INTERFACE_INCLUDE_DIRECTORIES)
Expand Down
18 changes: 18 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,7 @@ set(ARROW_UTIL_SRCS
util/int_util.cc
util/io_util.cc
util/list_util.cc
util/logger.cc
util/logging.cc
util/key_value_metadata.cc
util/memory.cc
Expand Down Expand Up @@ -627,6 +628,17 @@ if(ARROW_WITH_ZSTD)
endforeach()
endif()

if(ARROW_WITH_OPENTELEMETRY)
arrow_add_object_library(ARROW_TELEMETRY telemetry/logging.cc)

foreach(ARROW_TELEMETRY_TARGET ${ARROW_TELEMETRY_TARGETS})
target_link_libraries(${ARROW_TELEMETRY_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS})
endforeach()
else()
set(ARROW_TELEMETRY_TARGET_SHARED)
set(ARROW_TELEMETRY_TARGET_STATIC)
endif()

set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST})
set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON)
set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
Expand Down Expand Up @@ -1016,6 +1028,7 @@ add_arrow_lib(arrow
${ARROW_JSON_TARGET_SHARED}
${ARROW_MEMORY_POOL_TARGET_SHARED}
${ARROW_ORC_TARGET_SHARED}
${ARROW_TELEMETRY_TARGET_SHARED}
${ARROW_UTIL_TARGET_SHARED}
${ARROW_VENDORED_TARGET_SHARED}
${ARROW_SHARED_PRIVATE_LINK_LIBS}
Expand All @@ -1031,6 +1044,7 @@ add_arrow_lib(arrow
${ARROW_JSON_TARGET_STATIC}
${ARROW_MEMORY_POOL_TARGET_STATIC}
${ARROW_ORC_TARGET_STATIC}
${ARROW_TELEMETRY_TARGET_STATIC}
${ARROW_UTIL_TARGET_STATIC}
${ARROW_VENDORED_TARGET_STATIC}
${ARROW_SYSTEM_LINK_LIBS}
Expand Down Expand Up @@ -1260,6 +1274,10 @@ if(ARROW_SUBSTRAIT)
add_subdirectory(engine)
endif()

if(ARROW_WITH_OPENTELEMETRY)
add_subdirectory(telemetry)
endif()

if(ARROW_TENSORFLOW)
add_subdirectory(adapters/tensorflow)
endif()
2 changes: 2 additions & 0 deletions cpp/src/arrow/array/builder_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,8 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {
/// \brief Builder class for fixed-length list array value types
class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
public:
using TypeClass = FixedSizeListType;

/// Use this constructor to define the built array's type explicitly. If value_builder
/// has indeterminate type, this builder will also.
FixedSizeListBuilder(MemoryPool* pool,
Expand Down
4 changes: 1 addition & 3 deletions cpp/src/arrow/compute/expression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -763,9 +763,7 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
for (size_t i = 0; i < arguments.size(); ++i) {
ARROW_ASSIGN_OR_RAISE(
arguments[i], ExecuteScalarExpression(call->arguments[i], input, exec_context));
if (arguments[i].is_array()) {
all_scalar = false;
}
all_scalar &= arguments[i].is_scalar();
}

int64_t input_length;
Expand Down
35 changes: 35 additions & 0 deletions cpp/src/arrow/compute/expression_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,41 @@ TEST(Expression, ExecuteCallWithNoArguments) {
EXPECT_EQ(actual.length(), kCount);
}

TEST(Expression, ExecuteChunkedArray) {
// GH-41923: compute should generate the right result if input
// ExecBatch is `chunked_array`.
auto input_schema = struct_({field("a", struct_({
field("a", float64()),
field("b", float64()),
}))});

auto chunked_array_input = ChunkedArrayFromJSON(input_schema, {R"([
{"a": {"a": 6.125, "b": 3.375}},
{"a": {"a": 0.0, "b": 1}}
])",
R"([
{"a": {"a": -1, "b": 4.75}}
])"});

ASSERT_OK_AND_ASSIGN(auto table_input,
Table::FromChunkedStructArray(chunked_array_input));

auto expr = add(field_ref(FieldRef("a", "a")), field_ref(FieldRef("a", "b")));

ASSERT_OK_AND_ASSIGN(expr, expr.Bind(input_schema));
std::vector<Datum> inputs{table_input->column(0)};
ExecBatch batch{inputs, 3};

ASSERT_OK_AND_ASSIGN(Datum res, ExecuteScalarExpression(expr, batch));

AssertDatumsEqual(res, ArrayFromJSON(float64(),
R"([
9.5,
1,
3.75
])"));
}

TEST(Expression, ExecuteDictionaryTransparent) {
ExpectExecute(
equal(field_ref("a"), field_ref("b")),
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_list_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_random_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_round_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
Expand Down
Loading

0 comments on commit 254bdbf

Please sign in to comment.