From c9209aaa01b88fde46975d8f6d5d05f79219880b Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 1 Nov 2024 08:14:19 +0100 Subject: [PATCH] Python bindings to `cuFileDriverOpen()` and `cuFileDriverClose()` (#514) Changes: - Adding Python bindings to `cuFileDriverOpen()` and `cuFileDriverClose()`. - We now [only open the cufile driver explicitly](https://github.com/rapidsai/kvikio/pull/160) in CUDA versions older than v12.2. - Introducing `kvikio.cufile_driver.initialize()`, which open the cuFile driver and close it again at module exit. - Let CI fail if KvikIO wasn't built with cuFile support. * Except on cuda11.8+arm64; cuFile didn't support arm until cuda v12.4. - Some refactor and clean up! Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/kvikio/pull/514 --- ci/run_pytests.sh | 7 +- ci/test_wheel.sh | 7 +- cpp/examples/basic_io.cpp | 2 +- cpp/examples/basic_no_cuda.cpp | 2 +- .../downstream/downstream_example.cpp | 18 ++++- .../{cufile_config.hpp => cufile/config.hpp} | 0 cpp/include/kvikio/{ => cufile}/driver.hpp | 6 +- cpp/include/kvikio/file_handle.hpp | 5 +- cpp/include/kvikio/shim/cufile.hpp | 65 ++++++++++++++----- cpp/include/kvikio/stream.hpp | 2 +- python/kvikio/kvikio/__init__.py | 5 -- python/kvikio/kvikio/_lib/CMakeLists.txt | 2 +- ...river_properties.pyx => cufile_driver.pyx} | 15 ++++- python/kvikio/kvikio/benchmarks/utils.py | 3 +- python/kvikio/kvikio/cufile_driver.py | 62 ++++++++++++++++++ python/kvikio/pyproject.toml | 3 + python/kvikio/tests/test_cufile_driver.py | 12 ++++ 17 files changed, 178 insertions(+), 38 deletions(-) rename cpp/include/kvikio/{cufile_config.hpp => cufile/config.hpp} (100%) rename cpp/include/kvikio/{ => cufile}/driver.hpp (97%) rename python/kvikio/kvikio/_lib/{driver_properties.pyx => cufile_driver.pyx} (88%) create mode 100644 python/kvikio/kvikio/cufile_driver.py create mode 100644 python/kvikio/tests/test_cufile_driver.py diff --git a/ci/run_pytests.sh b/ci/run_pytests.sh index b2c93dbe56..1a7edb5be5 100755 --- a/ci/run_pytests.sh +++ b/ci/run_pytests.sh @@ -6,4 +6,9 @@ set -euo pipefail # Support invoking run_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/kvikio -pytest --cache-clear --verbose "$@" tests +# If running CUDA 11.8 on arm64, we skip tests marked "cufile" since +# cuFile didn't support arm until 12.4 +[[ "${CUDA_VERSION}" == "11.8.0" && "${RUNNER_ARCH}" == "ARM64" ]] \ + && PYTEST_MARK=( -m 'not cufile' ) || PYTEST_MARK=() + +pytest --cache-clear --verbose "${PYTEST_MARK[@]}" "$@" tests diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 94a31b04b6..a3f014ca3f 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -9,4 +9,9 @@ RAPIDS_PY_WHEEL_NAME="kvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-fr python -m pip install "$(echo ${WHEELHOUSE}/kvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" -python -m pytest ./python/kvikio/tests +# If running CUDA 11.8 on arm64, we skip tests marked "cufile" since +# cuFile didn't support arm until 12.4 +[[ "${CUDA_VERSION}" == "11.8.0" && "${RUNNER_ARCH}" == "ARM64" ]] \ + && PYTEST_MARK=( -m 'not cufile' ) || PYTEST_MARK=() + +python -m pytest --cache-clear --verbose "${PYTEST_MARK[@]}" ./python/kvikio/tests diff --git a/cpp/examples/basic_io.cpp b/cpp/examples/basic_io.cpp index 3a4ab892ad..1eabd8fdee 100644 --- a/cpp/examples/basic_io.cpp +++ b/cpp/examples/basic_io.cpp @@ -21,8 +21,8 @@ #include #include +#include #include -#include #include #include diff --git a/cpp/examples/basic_no_cuda.cpp b/cpp/examples/basic_no_cuda.cpp index 700e3e8be9..0d79a52883 100644 --- a/cpp/examples/basic_no_cuda.cpp +++ b/cpp/examples/basic_no_cuda.cpp @@ -19,8 +19,8 @@ #include #include +#include #include -#include #include #include diff --git a/cpp/examples/downstream/downstream_example.cpp b/cpp/examples/downstream/downstream_example.cpp index 269d50e9e1..87603908a1 100644 --- a/cpp/examples/downstream/downstream_example.cpp +++ b/cpp/examples/downstream/downstream_example.cpp @@ -1,7 +1,23 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include +#include #include -#include using namespace std; diff --git a/cpp/include/kvikio/cufile_config.hpp b/cpp/include/kvikio/cufile/config.hpp similarity index 100% rename from cpp/include/kvikio/cufile_config.hpp rename to cpp/include/kvikio/cufile/config.hpp diff --git a/cpp/include/kvikio/driver.hpp b/cpp/include/kvikio/cufile/driver.hpp similarity index 97% rename from cpp/include/kvikio/driver.hpp rename to cpp/include/kvikio/cufile/driver.hpp index 7d73f465aa..b609029a69 100644 --- a/cpp/include/kvikio/driver.hpp +++ b/cpp/include/kvikio/cufile/driver.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ inline void set_driver_flag(unsigned int& prop, unsigned int flag, bool val) noe class DriverInitializer { // Optional, if not used cuFiles opens the driver automatically public: - DriverInitializer() { CUFILE_TRY(cuFileAPI::instance().DriverOpen()); } + DriverInitializer() { cuFileAPI::instance().driver_open(); } DriverInitializer(DriverInitializer const&) = delete; DriverInitializer& operator=(DriverInitializer const&) = delete; @@ -55,7 +55,7 @@ class DriverInitializer { ~DriverInitializer() { try { - CUFILE_TRY(cuFileAPI::instance().DriverClose()); + cuFileAPI::instance().driver_close(); } catch (const CUfileException& e) { std::cerr << "Unable to close GDS file driver: "; std::cerr << e.what(); diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index 7c3e1c92a3..97c0ba9748 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -22,15 +22,12 @@ #include #include -#include -#include -#include #include #include #include #include -#include +#include #include #include #include diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index 861c869f35..c5c7a0671f 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -16,8 +16,8 @@ #pragma once #include +#include -#include #include #include @@ -38,8 +38,6 @@ class cuFileAPI { decltype(cuFileWrite)* Write{nullptr}; decltype(cuFileBufRegister)* BufRegister{nullptr}; decltype(cuFileBufDeregister)* BufDeregister{nullptr}; - decltype(cuFileDriverOpen)* DriverOpen{nullptr}; - decltype(cuFileDriverClose)* DriverClose{nullptr}; decltype(cuFileDriverGetProperties)* DriverGetProperties{nullptr}; decltype(cuFileDriverSetPollMode)* DriverSetPollMode{nullptr}; decltype(cuFileDriverSetMaxCacheSize)* DriverSetMaxCacheSize{nullptr}; @@ -54,6 +52,12 @@ class cuFileAPI { decltype(cuFileStreamRegister)* StreamRegister{nullptr}; decltype(cuFileStreamDeregister)* StreamDeregister{nullptr}; + private: + // Don't call driver open and close directly, use `.driver_open()` and `.driver_close()`. + decltype(cuFileDriverOpen)* DriverOpen{nullptr}; + decltype(cuFileDriverClose)* DriverClose{nullptr}; + + public: bool stream_available = false; private: @@ -105,25 +109,25 @@ class cuFileAPI { } #endif - // cuFile is supposed to open and close the driver automatically but because of a bug in - // CUDA 11.8, it sometimes segfault. See . - CUfileError_t const error = DriverOpen(); - if (error.err != CU_FILE_SUCCESS) { - throw std::runtime_error(std::string{"cuFile error at: "} + __FILE__ + ":" + - KVIKIO_STRINGIFY(__LINE__) + ": " + - cufileop_status_error(error.err)); - } + // cuFile is supposed to open and close the driver automatically but + // because of a bug in cuFile v1.4 (CUDA v11.8) it sometimes segfaults: + // . + // We use the stream API as a version indicator of cuFile since it was introduced + // in cuFile v1.7 (CUDA v12.2). + if (!stream_available) { driver_open(); } } + + // Notice, we have to close the driver at program exit (if we opened it) even though we are + // not allowed to call CUDA after main[1]. This is because, cuFile will segfault if the + // driver isn't closed on program exit i.e. we are doomed if we do, doomed if we don't, but + // this seems to be the lesser of two evils. + // [1] ~cuFileAPI() { - CUfileError_t const error = DriverClose(); - if (error.err != CU_FILE_SUCCESS) { - std::cerr << "Unable to close GDS file driver: " << cufileop_status_error(error.err) - << std::endl; - } + if (!stream_available) { driver_close(); } } #else - cuFileAPI() { throw std::runtime_error(CUFILE_ERRSTR(0)); } + cuFileAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); } #endif public: @@ -137,6 +141,33 @@ class cuFileAPI { static cuFileAPI _instance; return _instance; } + + /** + * @brief Open the cuFile driver + * + * cuFile allows multiple calls to `cufileDriverOpen()`, only the first call opens + * the driver, but every call should have a matching call to `cufileDriverClose()`. + */ + void driver_open() + { + CUfileError_t const error = DriverOpen(); + if (error.err != CU_FILE_SUCCESS) { + throw std::runtime_error(std::string{"Unable to open GDS file driver: "} + + cufileop_status_error(error.err)); + } + } + + /** + * @brief Close the cuFile driver + */ + void driver_close() + { + CUfileError_t const error = DriverClose(); + if (error.err != CU_FILE_SUCCESS) { + throw std::runtime_error(std::string{"Unable to close GDS file driver: "} + + cufileop_status_error(error.err)); + } + } }; /** diff --git a/cpp/include/kvikio/stream.hpp b/cpp/include/kvikio/stream.hpp index 2e206b4c5e..9eb9942b7a 100644 --- a/cpp/include/kvikio/stream.hpp +++ b/cpp/include/kvikio/stream.hpp @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include #include #include diff --git a/python/kvikio/kvikio/__init__.py b/python/kvikio/kvikio/__init__.py index 749d87ec1f..a2bfffaf48 100644 --- a/python/kvikio/kvikio/__init__.py +++ b/python/kvikio/kvikio/__init__.py @@ -1,15 +1,10 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. -from kvikio._lib import driver_properties # type: ignore from kvikio._version import __git_commit__, __version__ from kvikio.cufile import CuFile from kvikio.remote_file import RemoteFile, is_remote_file_available -# TODO: Wrap nicely, maybe as a dataclass? -DriverProperties = driver_properties.DriverProperties - - __all__ = [ "__git_commit__", "__version__", diff --git a/python/kvikio/kvikio/_lib/CMakeLists.txt b/python/kvikio/kvikio/_lib/CMakeLists.txt index 18bb46c0fb..364699f7bd 100644 --- a/python/kvikio/kvikio/_lib/CMakeLists.txt +++ b/python/kvikio/kvikio/_lib/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= # Set the list of Cython files to build, one .so per file -set(cython_modules arr.pyx buffer.pyx defaults.pyx driver_properties.pyx file_handle.pyx future.pyx +set(cython_modules arr.pyx buffer.pyx defaults.pyx cufile_driver.pyx file_handle.pyx future.pyx libnvcomp.pyx libnvcomp_ll.pyx ) diff --git a/python/kvikio/kvikio/_lib/driver_properties.pyx b/python/kvikio/kvikio/_lib/cufile_driver.pyx similarity index 88% rename from python/kvikio/kvikio/_lib/driver_properties.pyx rename to python/kvikio/kvikio/_lib/cufile_driver.pyx index 674ef14cde..29302a0104 100644 --- a/python/kvikio/kvikio/_lib/driver_properties.pyx +++ b/python/kvikio/kvikio/_lib/cufile_driver.pyx @@ -8,7 +8,20 @@ from libcpp cimport bool -cdef extern from "" nogil: +cdef extern from "" nogil: + cdef void cpp_driver_open "kvikio::cuFileAPI::instance().driver_open"() except + + cdef void cpp_driver_close "kvikio::cuFileAPI::instance().driver_close"() except + + + +def driver_open(): + cpp_driver_open() + + +def driver_close(): + cpp_driver_close() + + +cdef extern from "" nogil: cdef cppclass cpp_DriverProperties "kvikio::DriverProperties": cpp_DriverProperties() except + bool is_gds_available() except + diff --git a/python/kvikio/kvikio/benchmarks/utils.py b/python/kvikio/kvikio/benchmarks/utils.py index 69375b8c21..23c7731f24 100644 --- a/python/kvikio/kvikio/benchmarks/utils.py +++ b/python/kvikio/kvikio/benchmarks/utils.py @@ -12,6 +12,7 @@ from dask.utils import format_bytes import kvikio +import kvikio.cufile_driver import kvikio.defaults @@ -26,7 +27,7 @@ def drop_vm_cache() -> None: def pprint_sys_info() -> None: """Pretty print system information""" - props = kvikio.DriverProperties() + props = kvikio.cufile_driver.DriverProperties() try: import pynvml diff --git a/python/kvikio/kvikio/cufile_driver.py b/python/kvikio/kvikio/cufile_driver.py new file mode 100644 index 0000000000..e78242a514 --- /dev/null +++ b/python/kvikio/kvikio/cufile_driver.py @@ -0,0 +1,62 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# See file LICENSE for terms. + +import atexit + +from kvikio._lib import cufile_driver # type: ignore + +# TODO: Wrap nicely, maybe as a dataclass? +# +DriverProperties = cufile_driver.DriverProperties + + +def driver_open() -> None: + """Open the cuFile driver + + cuFile accepts multiple calls to `driver_open()`. Only the first call + opens the driver, but every call must have a matching call to + `driver_close()`. + + Normally, it is not required to open and close the cuFile driver since + it is done automatically. + + Raises + ------ + RuntimeError + If cuFile isn't available. + """ + return cufile_driver.driver_open() + + +def driver_close() -> None: + """Close the cuFile driver + + cuFile accepts multiple calls to `driver_open()`. Only the first call + opens the driver, but every call must have a matching call to + `driver_close()`. + + Raises + ------ + RuntimeError + If cuFile isn't available. + """ + return cufile_driver.driver_close() + + +def initialize() -> None: + """Open the cuFile driver and close it again at module exit + + Normally, it is not required to open and close the cuFile driver since + it is done automatically. + + Notes + ----- + Registers an atexit handler that calls :func:`driver_close`. + + Raises + ------ + RuntimeError + If cuFile isn't available. + """ + driver_open() + atexit.register(driver_close) diff --git a/python/kvikio/pyproject.toml b/python/kvikio/pyproject.toml index b30437cec0..cb9491e75e 100644 --- a/python/kvikio/pyproject.toml +++ b/python/kvikio/pyproject.toml @@ -144,3 +144,6 @@ filterwarnings = [ "ignore:Jitify is performing a one-time only warm-up to populate the persistent cache", "ignore::DeprecationWarning:botocore.*", ] +markers = [ + "cufile: tests to skip if cuFile isn't available e.g. run with `pytest -m 'not cufile'`" +] diff --git a/python/kvikio/tests/test_cufile_driver.py b/python/kvikio/tests/test_cufile_driver.py new file mode 100644 index 0000000000..0a64bf0952 --- /dev/null +++ b/python/kvikio/tests/test_cufile_driver.py @@ -0,0 +1,12 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# See file LICENSE for terms. + +import pytest + +import kvikio.cufile_driver + + +@pytest.mark.cufile +def test_open_and_close(): + kvikio.cufile_driver.driver_open() + kvikio.cufile_driver.driver_close()