Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into apachegh-43683-pand…
Browse files Browse the repository at this point in the history
…as-string-dtype
  • Loading branch information
jorisvandenbossche committed Jan 9, 2025
2 parents ea4cbf4 + 2b5f56c commit a59a2a2
Show file tree
Hide file tree
Showing 42 changed files with 1,085 additions and 339 deletions.
7 changes: 4 additions & 3 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ ULIMIT_CORE=-1

# Default versions for platforms
ALMALINUX=8
ALPINE_LINUX=3.16
ALPINE_LINUX=3.18
DEBIAN=12
FEDORA=39
UBUNTU=22.04
Expand Down Expand Up @@ -92,10 +92,11 @@ TZ=UTC
VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01" # 2024.04.26 Release

# This must be updated when we update
# ci/docker/python-wheel-windows-vs2019.dockerfile.
# ci/docker/python-*-windows-*.dockerfile.
# This is a workaround for our CI problem that "archery docker build" doesn't
# use pulled built images in dev/tasks/python-wheels/github.windows.yml.
PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-08-06
PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2025-01-08
PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2025-01-08

# Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan".
# See https://github.com/conan-io/conan-docker-tools#readme and
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ repos:
files: >-
(
?^ci/docker/conda-python-emscripten\.dockerfile$|
?^ci/docker/python-wheel-windows-test-vs2019\.dockerfile$|
?^ci/docker/python-.*-wheel-windows-test-vs2019.*\.dockerfile$|
)
types: []
- repo: https://github.com/pycqa/flake8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
# under the License.

ARG arch=amd64
FROM ${arch}/alpine:3.16
FROM ${arch}/alpine:3.18

RUN apk add \
apache-orc-dev \
bash \
benchmark-dev \
boost-dev \
Expand All @@ -39,8 +40,8 @@ RUN apk add \
grpc-dev \
gtest-dev \
libxml2-dev \
llvm13-dev \
llvm13-static \
llvm16-dev \
llvm16-static \
lz4-dev \
make \
musl-locales \
Expand All @@ -61,6 +62,7 @@ RUN apk add \
thrift-dev \
tzdata \
utf8proc-dev \
xsimd-dev \
zlib-dev \
zstd-dev && \
rm -rf /var/cache/apk/* && \
Expand Down Expand Up @@ -98,6 +100,5 @@ ENV ARROW_ACERO=ON \
ARROW_WITH_ZSTD=ON \
AWSSDK_SOURCE=BUNDLED \
google_cloud_cpp_storage_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PATH=/usr/lib/ccache/:$PATH \
xsimd_SOURCE=BUNDLED
MUSL_LOCPATH=/usr/share/i18n/locales/musl \
PATH=/usr/lib/ccache/bin:$PATH
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# NOTE: You must update PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION in .env
# when you update this file.

ARG base
# https://github.com/hadolint/hadolint/wiki/DL3006
# (Hadolint does not expand variables and thinks '${base}' is an untagged image)
# hadolint ignore=DL3006
FROM ${base}

ARG python=3.13

SHELL ["powershell", "-NoProfile", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
RUN $filename = 'python-3.13.1-amd64.exe'; \
$url = 'https://www.python.org/ftp/python/3.13.1/' + $filename; \
Invoke-WebRequest -Uri $url -OutFile $filename; \
Start-Process -FilePath $filename -ArgumentList '/quiet', 'Include_freethreaded=1' -Wait

ENV PYTHON_CMD="py -${python}t"

SHELL ["cmd", "/S", "/C"]
RUN %PYTHON_CMD% -m pip install -U pip setuptools

COPY python/requirements-wheel-test.txt C:/arrow/python/
# Cython and Pandas wheels for 3.13 free-threaded are not released yet
RUN %PYTHON_CMD% -m pip install \
--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
--pre \
--prefer-binary \
-r C:/arrow/python/requirements-wheel-test.txt
# cffi-based tests would crash when importing cffi.
# hadolint ignore=DL3059
RUN %PYTHON_CMD% -m pip uninstall -y cffi

ENV PYTHON="${python}t"
ENV PYTHON_GIL=0
49 changes: 49 additions & 0 deletions ci/docker/python-free-threaded-wheel-windows-vs2019.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# NOTE: You must update PYTHON_WHEEL_WINDOWS_IMAGE_REVISION in .env
# when you update this file.

ARG base
# https://github.com/hadolint/hadolint/wiki/DL3006
# (Hadolint does not expand variables and thinks '${base}' is an untagged image)
# hadolint ignore=DL3006
FROM ${base}

ARG python=3.13

SHELL ["powershell", "-NoProfile", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
RUN $filename = 'python-3.13.1-amd64.exe'; \
$url = 'https://www.python.org/ftp/python/3.13.1/' + $filename; \
Invoke-WebRequest -Uri $url -OutFile $filename; \
Start-Process -FilePath $filename -ArgumentList '/quiet', 'Include_freethreaded=1' -Wait

ENV PYTHON_CMD="py -${python}t"

SHELL ["cmd", "/S", "/C"]
RUN %PYTHON_CMD% -m pip install -U pip setuptools

COPY python/requirements-wheel-build.txt C:/arrow/python/
# Cython wheels for 3.13 free-threaded are not released yet
RUN %PYTHON_CMD% -m pip install \
--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
--pre \
--prefer-binary \
cython
RUN %PYTHON_CMD% -m pip install -r C:/arrow/python/requirements-wheel-build.txt

ENV PYTHON="${python}t"
51 changes: 51 additions & 0 deletions ci/docker/python-wheel-windows-test-vs2019-base.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# NOTE: You must update PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION in .env
# when you update this file.

# based on mcr.microsoft.com/windows/servercore:ltsc2019
# contains choco and vs2019 preinstalled
FROM abrarov/msvc-2019:2.11.0

# hadolint shell=cmd.exe

# Add unix tools to path
RUN setx path "%path%;C:\Program Files\Git\usr\bin"

# 1. Remove previous installations of Python from the base image
# NOTE: a more recent base image (tried with 2.12.1) comes with Python 3.9.7
# and the MSI installers are failing to remove pip and tcl/tk "products" making
# the subsequent choco python installation step failing for installing Python
# version 3.9.* due to existing python version
# 2. Install Minio for S3 testing.
RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
rm -rf Python* && \
curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z \
--output "C:\Windows\Minio.exe"

# Install archiver to extract xz archives (for timezone database).
# Install the GCS testbench using a well-known Python version.
# NOTE: cannot use pipx's `--fetch-missing-python` because of
# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
RUN choco install --no-progress -r -y archiver && \
choco install -r -y --pre --no-progress python --version=3.11.9
ENV PIPX_BIN_DIR=C:\\Windows\\
ENV PIPX_PYTHON="C:\Python311\python.exe"
COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
storage-testbench -h
56 changes: 19 additions & 37 deletions ci/docker/python-wheel-windows-test-vs2019.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,49 +15,31 @@
# specific language governing permissions and limitations
# under the License.

# NOTE: You must update PYTHON_WHEEL_WINDOWS_IMAGE_REVISION in .env
# NOTE: You must update PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION in .env
# when you update this file.

# based on mcr.microsoft.com/windows/servercore:ltsc2019
# contains choco and vs2019 preinstalled
FROM abrarov/msvc-2019:2.11.0
ARG base
# https://github.com/hadolint/hadolint/wiki/DL3006
# (Hadolint does not expand variables and thinks '${base}' is an untagged image)
# hadolint ignore=DL3006
FROM ${base}

# hadolint shell=cmd.exe

# Add unix tools to path
RUN setx path "%path%;C:\Program Files\Git\usr\bin"

# 1. Remove previous installations of python from the base image
# NOTE: a more recent base image (tried with 2.12.1) comes with python 3.9.7
# and the msi installers are failing to remove pip and tcl/tk "products" making
# the subsequent choco python installation step failing for installing python
# version 3.9.* due to existing python version
# 2. Install Minio for S3 testing.
RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
rm -rf Python* && \
curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2024-09-13T20-26-02Z \
--output "C:\Windows\Minio.exe"

# Install the GCS testbench using a well-known Python version.
# NOTE: cannot use pipx's `--fetch-missing-python` because of
# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
RUN choco install -r -y --pre --no-progress python --version=3.11.9
ENV PIPX_BIN_DIR=C:\\Windows\\
ENV PIPX_PYTHON="C:\Python311\python.exe"
COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
storage-testbench -h

# Define the full version number otherwise choco falls back to patch number 0 (3.9 => 3.9.0)
ARG python=3.9
RUN (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13") & \
(if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11") & \
(if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9") & \
(if "%python%"=="3.12" setx PYTHON_VERSION "3.12.5") & \
(if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1")

# Install archiver to extract xz archives
RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% && \
choco install --no-progress -r -y archiver
RUN (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PYTHON_CMD "C:\Python39\python") & \
(if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PYTHON_CMD "py -3.10") & \
(if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PYTHON_CMD "py -3.11") & \
(if "%python%"=="3.12" setx PYTHON_VERSION "3.12.8" && setx PYTHON_CMD "py -3.12") & \
(if "%python%"=="3.13" setx PYTHON_VERSION "3.13.1" && setx PYTHON_CMD "py -3.13")

# hadolint ignore=DL3059
RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION%
# hadolint ignore=DL3059
RUN %PYTHON_CMD% -m pip install -U pip setuptools

COPY python/requirements-wheel-test.txt C:/arrow/python/
RUN %PYTHON_CMD% -m pip install -r C:/arrow/python/requirements-wheel-test.txt

ENV PYTHON=$python
79 changes: 79 additions & 0 deletions ci/docker/python-wheel-windows-vs2019-base.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# NOTE: You must update PYTHON_WHEEL_WINDOWS_IMAGE_REVISION in .env
# when you update this file.

# based on mcr.microsoft.com/windows/servercore:ltsc2019
# contains choco and vs2019 preinstalled
FROM abrarov/msvc-2019:2.11.0

# Install CMake and Ninja
ARG cmake=3.31.2
RUN choco install --no-progress -r -y cmake --version=%cmake% --installargs 'ADD_CMAKE_TO_PATH=System' && \
choco install --no-progress -r -y gzip wget ninja

# Add unix tools to path
RUN setx path "%path%;C:\Program Files\Git\usr\bin"

# Install vcpkg
#
# Compiling vcpkg itself from a git tag doesn't work anymore since vcpkg has
# started to ship precompiled binaries for the vcpkg-tool.
ARG vcpkg
COPY ci/vcpkg/*.patch \
ci/vcpkg/*windows*.cmake \
arrow/ci/vcpkg/
COPY ci/scripts/install_vcpkg.sh arrow/ci/scripts/
ENV VCPKG_ROOT=C:\\vcpkg
RUN bash arrow/ci/scripts/install_vcpkg.sh /c/vcpkg %vcpkg% && \
setx PATH "%PATH%;%VCPKG_ROOT%"

# Configure vcpkg and install dependencies
# NOTE: use windows batch environment notation for build arguments in RUN
# statements but bash notation in ENV statements
# VCPKG_FORCE_SYSTEM_BINARIES=1 spare around ~750MB of image size if the system
# cmake's and ninja's versions are recent enough
ARG build_type=release
ENV CMAKE_BUILD_TYPE=${build_type} \
VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg \
VCPKG_DEFAULT_TRIPLET=amd64-windows-static-md-${build_type} \
VCPKG_FEATURE_FLAGS="manifests"
COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
# cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
# ssl related fixes as well as we can patch the vcpkg portfile to support
# arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186
# but we cannot patch those portfiles since vcpkg-tool handles the checkout of
# previous versions => use bundled S3 build
RUN vcpkg install \
--clean-after-build \
--x-install-root=%VCPKG_ROOT%\installed \
--x-manifest-root=arrow/ci/vcpkg \
--x-feature=flight \
--x-feature=gcs \
--x-feature=json \
--x-feature=orc \
--x-feature=parquet \
--x-feature=s3

# Remove previous installations of Python from the base image
# NOTE: a more recent base image (tried with 2.12.1) comes with Python 3.9.7
# and the MSI installers are failing to remove pip and tcl/tk "products" making
# the subsequent choco python installation step failing for installing Python
# version 3.9.* due to existing Python version
RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
rm -rf Python*
Loading

0 comments on commit a59a2a2

Please sign in to comment.