Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into apachegh-43519-clea…
Browse files Browse the repository at this point in the history
…nup-requirements
  • Loading branch information
jorisvandenbossche committed Oct 2, 2024
2 parents fa65a63 + 3abfc0e commit 6eea81c
Show file tree
Hide file tree
Showing 207 changed files with 6,444 additions and 1,023 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ ALMALINUX=8
ALPINE_LINUX=3.16
DEBIAN=12
FEDORA=39
UBUNTU=20.04
UBUNTU=22.04

# Default versions for various dependencies
CLANG_TOOLS=14
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ jobs:
matrix:
include:
- architecture: AMD64
macos-version: "12"
macos-version: "13"
- architecture: ARM64
macos-version: "14"
env:
Expand Down Expand Up @@ -460,7 +460,7 @@ jobs:
mkdir -p /usr/local/bin
wget \
--output-document /usr/local/bin/minio.exe \
https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z
chmod +x /usr/local/bin/minio.exe
- name: Set up Python
uses: actions/[email protected]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/csharp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ jobs:

macos:
name: AMD64 macOS 13 C# ${{ matrix.dotnet }}
runs-on: macos-13 # Pending https://github.com/pythonnet/pythonnet/issues/2396
runs-on: macos-13
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 15
strategy:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@ jobs:
run: archery docker push ${{ matrix.image }}

macos:
name: AMD64 macOS 12 Java JDK ${{ matrix.jdk }}
runs-on: macos-12
name: AMD64 macOS 13 Java JDK ${{ matrix.jdk }}
runs-on: macos-13
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 30
strategy:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/js.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ jobs:
run: archery docker push debian-js

macos:
name: AMD64 macOS 12 NodeJS ${{ matrix.node }}
runs-on: macos-12
name: AMD64 macOS 13 NodeJS ${{ matrix.node }}
runs-on: macos-13
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 30
strategy:
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/matlab.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ jobs:
strategy:
matrix:
include:
- architecture: AMD64
macos-version: "12"
- architecture: ARM64
macos-version: "14"
- architecture: AMD64
macos-version: "13"
- architecture: ARM64
macos-version: "14"
steps:
- name: Check out repository
uses: actions/checkout@v4
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ jobs:
python: "3.11"
env:
PYTHON: ${{ matrix.python || 3.9 }}
UBUNTU: ${{ matrix.ubuntu || 20.04 }}
UBUNTU: ${{ matrix.ubuntu || 22.04 }}
PANDAS: ${{ matrix.pandas || 'latest' }}
NUMPY: ${{ matrix.numpy || 'latest' }}
steps:
Expand Down Expand Up @@ -143,7 +143,7 @@ jobs:
matrix:
include:
- architecture: AMD64
macos-version: "12"
macos-version: "13"
- architecture: ARM64
macos-version: "14"
env:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ruby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
run: archery docker push ubuntu-ruby

macos:
name: AMD64 macOS 14 GLib & Ruby
name: ARM64 macOS 14 GLib & Ruby
runs-on: macos-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 60
Expand Down
16 changes: 8 additions & 8 deletions ci/appveyor-cpp-setup.bat
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,15 @@ conda config --append disallowed_packages pypy3
conda info -a

@rem
@rem Install mamba to the base environment
@rem Install Python to the base environment
@rem
conda install -q -y -c conda-forge mamba python=%PYTHON% || exit /B
conda install -q -y -c conda-forge python=%PYTHON% || exit /B

@rem Can't use conda-libmamba-solver 2.0.0
conda config --set solver classic

@rem Update for newer CA certificates
mamba update -q -y -c conda-forge --all || exit /B
conda update -q -y -c conda-forge --all || exit /B

@rem
@rem Create conda environment
Expand All @@ -66,11 +69,8 @@ set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt
@rem Force conda to use conda-forge
conda config --add channels conda-forge
conda config --remove channels defaults
@rem Ensure using the latest information. If there are invalid caches,
@rem mamba may use invalid download URL.
mamba clean --all -y
@rem Arrow conda environment
mamba create -n arrow -y -c conda-forge ^
conda create -n arrow -y -c conda-forge ^
--file=ci\conda_env_python.txt ^
%CONDA_PACKAGES% ^
"ccache" ^
Expand All @@ -94,7 +94,7 @@ set CXX=cl.exe
@rem Download Minio somewhere on PATH, for unit tests
@rem
if "%ARROW_S3%" == "ON" (
appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z -FileName C:\Windows\Minio.exe || exit /B
appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z -FileName C:\Windows\Minio.exe || exit /B
)


Expand Down
4 changes: 2 additions & 2 deletions ci/docker/conda.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# install conda and mamba via mambaforge
# install conda and mamba via miniforge
COPY ci/scripts/install_conda.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_conda.sh mambaforge latest /opt/conda
RUN /arrow/ci/scripts/install_conda.sh miniforge3 latest /opt/conda
ENV PATH=/opt/conda/bin:$PATH

# create a conda environment
Expand Down
2 changes: 1 addition & 1 deletion ci/docker/python-wheel-windows-test-vs2019.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ RUN setx path "%path%;C:\Program Files\Git\usr\bin"
# 2. Install Minio for S3 testing.
RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
rm -rf Python* && \
curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z \
curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z \
--output "C:\Windows\Minio.exe"

# Install the GCS testbench using a well-known Python version.
Expand Down
7 changes: 7 additions & 0 deletions ci/docker/ubuntu-22.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ RUN apt-get update -y -q && \
bzip2 \
ca-certificates \
ccache \
ceph \
ceph-fuse \
ceph-mds \
cmake \
curl \
gdb \
Expand All @@ -91,6 +94,7 @@ RUN apt-get update -y -q && \
libprotobuf-dev \
libprotoc-dev \
libpsl-dev \
libradospp-dev \
libre2-dev \
librtmp-dev \
libsnappy-dev \
Expand All @@ -112,10 +116,13 @@ RUN apt-get update -y -q && \
protobuf-compiler-grpc \
python3-dev \
python3-pip \
python3-rados \
python3-venv \
rados-objclass-dev \
rapidjson-dev \
rsync \
tzdata \
uuid-runtime \
wget \
xz-utils && \
apt-get clean && \
Expand Down
7 changes: 7 additions & 0 deletions ci/docker/ubuntu-24.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ RUN apt-get update -y -q && \
autoconf \
ca-certificates \
ccache \
ceph \
ceph-fuse \
ceph-mds \
cmake \
curl \
gdb \
Expand All @@ -91,6 +94,7 @@ RUN apt-get update -y -q && \
libprotobuf-dev \
libprotoc-dev \
libpsl-dev \
libradospp-dev \
libre2-dev \
librtmp-dev \
libsnappy-dev \
Expand All @@ -112,11 +116,14 @@ RUN apt-get update -y -q && \
protobuf-compiler-grpc \
python3-dev \
python3-pip \
python3-rados \
python3-venv \
rados-objclass-dev \
rapidjson-dev \
rsync \
tzdata \
tzdata-legacy \
uuid-runtime \
wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists*
Expand Down
8 changes: 7 additions & 1 deletion ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ if [ ! -z "${CONDA_PREFIX}" ] && [ "${ARROW_EMSCRIPTEN:-OFF}" = "OFF" ]; then
echo -e "===\n=== Conda environment for build\n==="
conda list

export ARROW_CMAKE_ARGS="${ARROW_CMAKE_ARGS} -DCMAKE_AR=${AR} -DCMAKE_RANLIB=${RANLIB}"
if [ -n "${AR}" ]; then
ARROW_CMAKE_ARGS+=" -DCMAKE_AR=${AR}"
fi
if [ -n "${RANLIB}" ]; then
ARROW_CMAKE_ARGS+=" -DCMAKE_RANLIB=${RANLIB}"
fi
export ARROW_CMAKE_ARGS
export ARROW_GANDIVA_PC_CXX_FLAGS=$(echo | ${CXX} -E -Wp,-v -xc++ - 2>&1 | grep '^ ' | awk '{print "-isystem;" substr($1, 1)}' | tr '\n' ';')
elif [ -x "$(command -v xcrun)" ]; then
export ARROW_GANDIVA_PC_CXX_FLAGS="-isysroot;$(xcrun --show-sdk-path)"
Expand Down
9 changes: 6 additions & 3 deletions ci/scripts/install_conda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
set -e

if [ "$#" -ne 3 ]; then
echo "Usage: $0 <installer: miniforge or mambaforge> <version> <prefix>"
echo "Usage: $0 <installer: miniforge3> <version> <prefix>"
exit 1
fi

Expand All @@ -30,8 +30,11 @@ installer=$1
version=$2
prefix=$3

echo "Downloading Miniconda installer..."
wget -nv https://github.com/conda-forge/miniforge/releases/latest/download/${installer^}-${platform}-${arch}.sh -O /tmp/installer.sh
download_url=https://github.com/conda-forge/miniforge/releases/latest/download/${installer^}-${platform}-${arch}.sh

echo "Downloading Miniconda installer from ${download_url} ..."

wget -nv ${download_url} -O /tmp/installer.sh
bash /tmp/installer.sh -b -p ${prefix}
rm /tmp/installer.sh

Expand Down
4 changes: 2 additions & 2 deletions ci/scripts/install_minio.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ if [ "${version}" != "latest" ]; then
fi

# Use specific versions for minio server and client to avoid CI failures on new releases.
minio_version="minio.RELEASE.2022-05-26T05-48-41Z"
mc_version="mc.RELEASE.2022-05-09T04-08-26Z"
minio_version="minio.RELEASE.2024-09-13T20-26-02Z"
mc_version="mc.RELEASE.2024-09-16T17-43-14Z"

download()
{
Expand Down
2 changes: 1 addition & 1 deletion ci/scripts/nanoarrow_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ set -x
mkdir -p ${build_dir}
pushd ${build_dir}

cmake ${source_dir} -DNANOARROW_BUILD_INTEGRATION_TESTS=ON
cmake ${source_dir} -DNANOARROW_IPC=ON -DNANOARROW_BUILD_INTEGRATION_TESTS=ON
cmake --build .

popd
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")

set(ARROW_LLVM_VERSIONS
"19.1"
"18.1"
"17.0"
"16.0"
Expand Down
6 changes: 5 additions & 1 deletion cpp/build-support/cpplint.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,11 @@
# Hardware specific headers
'arm_neon.h',
'emmintrin.h',
'xmmintin.h',
'immintrin.h',
'intrin.h',
'nmmintrin.h',
'x86intrin.h',
'xmmintrin.h',
])

# Folders of C libraries so commonly used in C++,
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,7 @@ add_arrow_test(sparse_tensor_test)
add_arrow_test(stl_test SOURCES stl_iterator_test.cc stl_test.cc)

add_arrow_benchmark(builder_benchmark)
add_arrow_benchmark(chunk_resolver_benchmark)
add_arrow_benchmark(compare_benchmark)
add_arrow_benchmark(memory_pool_benchmark)
add_arrow_benchmark(type_benchmark)
Expand Down
12 changes: 2 additions & 10 deletions cpp/src/arrow/acero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,6 @@ add_custom_target(arrow_acero)

arrow_install_all_headers("arrow/acero")

macro(append_acero_runtime_avx2_src SRC)
if(ARROW_HAVE_RUNTIME_AVX2)
list(APPEND ARROW_ACERO_SRCS ${SRC})
set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX2_FLAG})
endif()
endmacro()

set(ARROW_ACERO_SRCS
accumulation_queue.cc
scalar_aggregate_node.cc
Expand Down Expand Up @@ -58,8 +50,8 @@ set(ARROW_ACERO_SRCS
union_node.cc
util.cc)

append_acero_runtime_avx2_src(bloom_filter_avx2.cc)
append_acero_runtime_avx2_src(swiss_join_avx2.cc)
append_runtime_avx2_src(ARROW_ACERO_SRCS bloom_filter_avx2.cc)
append_runtime_avx2_src(ARROW_ACERO_SRCS swiss_join_avx2.cc)

set(ARROW_ACERO_SHARED_LINK_LIBS)
set(ARROW_ACERO_SHARED_PRIVATE_LINK_LIBS)
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/acero/bloom_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
// under the License.

#include "arrow/acero/bloom_filter.h"

#include <random>
#include "arrow/acero/util.h" // PREFETCH

#include "arrow/util/bit_util.h" // Log2
#include "arrow/util/bitmap_ops.h" // CountSetBits
#include "arrow/util/config.h"
#include "arrow/util/prefetch.h" // PREFETCH

namespace arrow {
namespace acero {
Expand Down Expand Up @@ -152,7 +154,7 @@ void BlockedBloomFilter::FindImp(int64_t num_rows, const T* hashes,
if (enable_prefetch && UsePrefetch()) {
constexpr int kPrefetchIterations = 16;
for (int64_t i = 0; i < num_rows - kPrefetchIterations; ++i) {
PREFETCH(blocks_ + block_id(hashes[i + kPrefetchIterations]));
ARROW_PREFETCH(blocks_ + block_id(hashes[i + kPrefetchIterations]));
uint64_t result = Find(hashes[i]) ? 1ULL : 0ULL;
bits |= result << (i & 63);
if ((i & 63) == 63) {
Expand Down
5 changes: 1 addition & 4 deletions cpp/src/arrow/acero/bloom_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@

#pragma once

#if defined(ARROW_HAVE_RUNTIME_AVX2)
# include <immintrin.h>
#endif

#include <atomic>
#include <cstdint>
#include <memory>
Expand All @@ -30,6 +26,7 @@
#include "arrow/memory_pool.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/util/simd.h"

namespace arrow {
namespace acero {
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/arrow/acero/bloom_filter_avx2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,9 @@
// specific language governing permissions and limitations
// under the License.

#include <immintrin.h>

#include "arrow/acero/bloom_filter.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/simd.h"

namespace arrow {
namespace acero {
Expand Down
Loading

0 comments on commit 6eea81c

Please sign in to comment.