From 6cfefb802c099caf20d4babc24f40731c7a8b35f Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 24 Oct 2024 11:23:29 +0200 Subject: [PATCH 01/14] move to subfolder: cufile --- cpp/examples/basic_io.cpp | 2 +- cpp/examples/basic_no_cuda.cpp | 2 +- cpp/examples/downstream/downstream_example.cpp | 18 +++++++++++++++++- .../{cufile_config.hpp => cufile/config.hpp} | 0 cpp/include/kvikio/{ => cufile}/driver.hpp | 0 cpp/include/kvikio/file_handle.hpp | 5 +---- .../kvikio/kvikio/_lib/driver_properties.pyx | 2 +- 7 files changed, 21 insertions(+), 8 deletions(-) rename cpp/include/kvikio/{cufile_config.hpp => cufile/config.hpp} (100%) rename cpp/include/kvikio/{ => cufile}/driver.hpp (100%) diff --git a/cpp/examples/basic_io.cpp b/cpp/examples/basic_io.cpp index 3a4ab892ad..1eabd8fdee 100644 --- a/cpp/examples/basic_io.cpp +++ b/cpp/examples/basic_io.cpp @@ -21,8 +21,8 @@ #include #include +#include #include -#include #include #include diff --git a/cpp/examples/basic_no_cuda.cpp b/cpp/examples/basic_no_cuda.cpp index 700e3e8be9..0d79a52883 100644 --- a/cpp/examples/basic_no_cuda.cpp +++ b/cpp/examples/basic_no_cuda.cpp @@ -19,8 +19,8 @@ #include #include +#include #include -#include #include #include diff --git a/cpp/examples/downstream/downstream_example.cpp b/cpp/examples/downstream/downstream_example.cpp index 269d50e9e1..87603908a1 100644 --- a/cpp/examples/downstream/downstream_example.cpp +++ b/cpp/examples/downstream/downstream_example.cpp @@ -1,7 +1,23 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include +#include #include -#include using namespace std; diff --git a/cpp/include/kvikio/cufile_config.hpp b/cpp/include/kvikio/cufile/config.hpp similarity index 100% rename from cpp/include/kvikio/cufile_config.hpp rename to cpp/include/kvikio/cufile/config.hpp diff --git a/cpp/include/kvikio/driver.hpp b/cpp/include/kvikio/cufile/driver.hpp similarity index 100% rename from cpp/include/kvikio/driver.hpp rename to cpp/include/kvikio/cufile/driver.hpp diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index 19445f1333..69b5087f0c 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -22,15 +22,12 @@ #include #include -#include -#include -#include #include #include #include #include -#include +#include #include #include #include diff --git a/python/kvikio/kvikio/_lib/driver_properties.pyx b/python/kvikio/kvikio/_lib/driver_properties.pyx index 674ef14cde..3028bfcbc6 100644 --- a/python/kvikio/kvikio/_lib/driver_properties.pyx +++ b/python/kvikio/kvikio/_lib/driver_properties.pyx @@ -8,7 +8,7 @@ from libcpp cimport bool -cdef extern from "" nogil: +cdef extern from "" nogil: cdef cppclass cpp_DriverProperties "kvikio::DriverProperties": cpp_DriverProperties() except + bool is_gds_available() except + From 2e4f50e6a38222aeddb06d7ba1d0eaefd256d9f7 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Fri, 25 Oct 2024 12:21:11 +0200 Subject: [PATCH 02/14] cuFileAPI: impl. driver_open() and driver_close() --- cpp/include/kvikio/cufile/driver.hpp | 6 +-- cpp/include/kvikio/shim/cufile.hpp | 65 +++++++++++++++++++--------- cpp/include/kvikio/stream.hpp | 2 +- 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/cpp/include/kvikio/cufile/driver.hpp b/cpp/include/kvikio/cufile/driver.hpp index 7d73f465aa..b609029a69 100644 --- a/cpp/include/kvikio/cufile/driver.hpp +++ b/cpp/include/kvikio/cufile/driver.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ inline void set_driver_flag(unsigned int& prop, unsigned int flag, bool val) noe class DriverInitializer { // Optional, if not used cuFiles opens the driver automatically public: - DriverInitializer() { CUFILE_TRY(cuFileAPI::instance().DriverOpen()); } + DriverInitializer() { cuFileAPI::instance().driver_open(); } DriverInitializer(DriverInitializer const&) = delete; DriverInitializer& operator=(DriverInitializer const&) = delete; @@ -55,7 +55,7 @@ class DriverInitializer { ~DriverInitializer() { try { - CUFILE_TRY(cuFileAPI::instance().DriverClose()); + cuFileAPI::instance().driver_close(); } catch (const CUfileException& e) { std::cerr << "Unable to close GDS file driver: "; std::cerr << e.what(); diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index 861c869f35..bc4d855117 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -16,8 +16,8 @@ #pragma once #include +#include -#include #include #include @@ -38,8 +38,6 @@ class cuFileAPI { decltype(cuFileWrite)* Write{nullptr}; decltype(cuFileBufRegister)* BufRegister{nullptr}; decltype(cuFileBufDeregister)* BufDeregister{nullptr}; - decltype(cuFileDriverOpen)* DriverOpen{nullptr}; - decltype(cuFileDriverClose)* DriverClose{nullptr}; decltype(cuFileDriverGetProperties)* DriverGetProperties{nullptr}; decltype(cuFileDriverSetPollMode)* DriverSetPollMode{nullptr}; decltype(cuFileDriverSetMaxCacheSize)* DriverSetMaxCacheSize{nullptr}; @@ -54,6 +52,12 @@ class cuFileAPI { decltype(cuFileStreamRegister)* StreamRegister{nullptr}; decltype(cuFileStreamDeregister)* StreamDeregister{nullptr}; + private: + // Don't call driver open and close directly, use `.driver_open()` and `.driver_close()`. + decltype(cuFileDriverOpen)* _DriverOpen{nullptr}; + decltype(cuFileDriverClose)* _DriverClose{nullptr}; + + public: bool stream_available = false; private: @@ -77,8 +81,8 @@ class cuFileAPI { get_symbol(Write, lib, KVIKIO_STRINGIFY(cuFileWrite)); get_symbol(BufRegister, lib, KVIKIO_STRINGIFY(cuFileBufRegister)); get_symbol(BufDeregister, lib, KVIKIO_STRINGIFY(cuFileBufDeregister)); - get_symbol(DriverOpen, lib, KVIKIO_STRINGIFY(cuFileDriverOpen)); - get_symbol(DriverClose, lib, KVIKIO_STRINGIFY(cuFileDriverClose)); + get_symbol(_DriverOpen, lib, KVIKIO_STRINGIFY(cuFileDriverOpen)); + get_symbol(_DriverClose, lib, KVIKIO_STRINGIFY(cuFileDriverClose)); get_symbol(DriverGetProperties, lib, KVIKIO_STRINGIFY(cuFileDriverGetProperties)); get_symbol(DriverSetPollMode, lib, KVIKIO_STRINGIFY(cuFileDriverSetPollMode)); get_symbol(DriverSetMaxCacheSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxCacheSize)); @@ -107,23 +111,17 @@ class cuFileAPI { // cuFile is supposed to open and close the driver automatically but because of a bug in // CUDA 11.8, it sometimes segfault. See . - CUfileError_t const error = DriverOpen(); - if (error.err != CU_FILE_SUCCESS) { - throw std::runtime_error(std::string{"cuFile error at: "} + __FILE__ + ":" + - KVIKIO_STRINGIFY(__LINE__) + ": " + - cufileop_status_error(error.err)); - } - } - ~cuFileAPI() - { - CUfileError_t const error = DriverClose(); - if (error.err != CU_FILE_SUCCESS) { - std::cerr << "Unable to close GDS file driver: " << cufileop_status_error(error.err) - << std::endl; - } + driver_open(); } + + // Notice, we have to close the driver at program exit even though we are not allowed to + // call CUDA after main[1]. This is because, cuFile will segfault if the driver isn't + // closed on program exit i.e. we are doomed if we do, doomed if we don't, but this seems + // to be the lesser of two evils. + // [1] + ~cuFileAPI() { driver_close(); } #else - cuFileAPI() { throw std::runtime_error(CUFILE_ERRSTR(0)); } + cuFileAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); } #endif public: @@ -137,6 +135,33 @@ class cuFileAPI { static cuFileAPI _instance; return _instance; } + + /** + * @brief Open the cuFile driver + * + * cuFile accept multiple calls to `cufileDriverOpen()`, only the first call opens + * the driver, but every call should have a matching call to `cufileDriverClose()`. + */ + void driver_open() + { + CUfileError_t const error = _DriverOpen(); + if (error.err != CU_FILE_SUCCESS) { + throw std::runtime_error(std::string{"Unable to open GDS file driver: "} + + cufileop_status_error(error.err)); + } + } + + /** + * @brief Close the cuFile driver + */ + void driver_close() + { + CUfileError_t const error = _DriverClose(); + if (error.err != CU_FILE_SUCCESS) { + throw std::runtime_error(std::string{"Unable to close GDS file driver: "} + + cufileop_status_error(error.err)); + } + } }; /** diff --git a/cpp/include/kvikio/stream.hpp b/cpp/include/kvikio/stream.hpp index 2e206b4c5e..9eb9942b7a 100644 --- a/cpp/include/kvikio/stream.hpp +++ b/cpp/include/kvikio/stream.hpp @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include #include #include From 60c1debbfd2b1557ed232462df2855b6e35e6428 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Sat, 26 Oct 2024 15:34:52 +0200 Subject: [PATCH 03/14] shim: only open the driver on older CUDA installations (<12.2) --- cpp/include/kvikio/shim/cufile.hpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index bc4d855117..e4f49527dc 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -111,15 +111,20 @@ class cuFileAPI { // cuFile is supposed to open and close the driver automatically but because of a bug in // CUDA 11.8, it sometimes segfault. See . - driver_open(); + if (!stream_available) { // The stream API was introduced in CUDA 12.2. + driver_open(); + } } - // Notice, we have to close the driver at program exit even though we are not allowed to - // call CUDA after main[1]. This is because, cuFile will segfault if the driver isn't - // closed on program exit i.e. we are doomed if we do, doomed if we don't, but this seems - // to be the lesser of two evils. + // Notice, we have to close the driver at program exit (if we opened it) even though we are + // not allowed to call CUDA after main[1]. This is because, cuFile will segfault if the + // driver isn't closed on program exit i.e. we are doomed if we do, doomed if we don't, but + // this seems to be the lesser of two evils. // [1] - ~cuFileAPI() { driver_close(); } + ~cuFileAPI() + { + if (!stream_available) { driver_close(); } + } #else cuFileAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); } #endif From 4e9edc0f28dbe34e0caa90d6483f35657a446460 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Sat, 26 Oct 2024 16:32:30 +0200 Subject: [PATCH 04/14] rename: driver_properties.pyx => cufile_driver.pyx --- python/kvikio/kvikio/__init__.py | 4 ++-- python/kvikio/kvikio/_lib/CMakeLists.txt | 2 +- .../kvikio/_lib/{driver_properties.pyx => cufile_driver.pyx} | 0 3 files changed, 3 insertions(+), 3 deletions(-) rename python/kvikio/kvikio/_lib/{driver_properties.pyx => cufile_driver.pyx} (100%) diff --git a/python/kvikio/kvikio/__init__.py b/python/kvikio/kvikio/__init__.py index 749d87ec1f..5a3bc459e5 100644 --- a/python/kvikio/kvikio/__init__.py +++ b/python/kvikio/kvikio/__init__.py @@ -1,13 +1,13 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. -from kvikio._lib import driver_properties # type: ignore +from kvikio._lib import cufile_driver # type: ignore from kvikio._version import __git_commit__, __version__ from kvikio.cufile import CuFile from kvikio.remote_file import RemoteFile, is_remote_file_available # TODO: Wrap nicely, maybe as a dataclass? -DriverProperties = driver_properties.DriverProperties +DriverProperties = cufile_driver.DriverProperties __all__ = [ diff --git a/python/kvikio/kvikio/_lib/CMakeLists.txt b/python/kvikio/kvikio/_lib/CMakeLists.txt index 18bb46c0fb..364699f7bd 100644 --- a/python/kvikio/kvikio/_lib/CMakeLists.txt +++ b/python/kvikio/kvikio/_lib/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= # Set the list of Cython files to build, one .so per file -set(cython_modules arr.pyx buffer.pyx defaults.pyx driver_properties.pyx file_handle.pyx future.pyx +set(cython_modules arr.pyx buffer.pyx defaults.pyx cufile_driver.pyx file_handle.pyx future.pyx libnvcomp.pyx libnvcomp_ll.pyx ) diff --git a/python/kvikio/kvikio/_lib/driver_properties.pyx b/python/kvikio/kvikio/_lib/cufile_driver.pyx similarity index 100% rename from python/kvikio/kvikio/_lib/driver_properties.pyx rename to python/kvikio/kvikio/_lib/cufile_driver.pyx From e1637d79f6face5be56d147a169aed59913d5f03 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Sat, 26 Oct 2024 16:39:42 +0200 Subject: [PATCH 05/14] driver open/close python bindings --- python/kvikio/kvikio/_lib/cufile_driver.pyx | 13 +++++ python/kvikio/kvikio/cufile_driver.py | 57 +++++++++++++++++++++ python/kvikio/tests/test_cufile_driver.py | 19 +++++++ 3 files changed, 89 insertions(+) create mode 100644 python/kvikio/kvikio/cufile_driver.py create mode 100644 python/kvikio/tests/test_cufile_driver.py diff --git a/python/kvikio/kvikio/_lib/cufile_driver.pyx b/python/kvikio/kvikio/_lib/cufile_driver.pyx index 3028bfcbc6..29302a0104 100644 --- a/python/kvikio/kvikio/_lib/cufile_driver.pyx +++ b/python/kvikio/kvikio/_lib/cufile_driver.pyx @@ -8,6 +8,19 @@ from libcpp cimport bool +cdef extern from "" nogil: + cdef void cpp_driver_open "kvikio::cuFileAPI::instance().driver_open"() except + + cdef void cpp_driver_close "kvikio::cuFileAPI::instance().driver_close"() except + + + +def driver_open(): + cpp_driver_open() + + +def driver_close(): + cpp_driver_close() + + cdef extern from "" nogil: cdef cppclass cpp_DriverProperties "kvikio::DriverProperties": cpp_DriverProperties() except + diff --git a/python/kvikio/kvikio/cufile_driver.py b/python/kvikio/kvikio/cufile_driver.py new file mode 100644 index 0000000000..8a0e625a55 --- /dev/null +++ b/python/kvikio/kvikio/cufile_driver.py @@ -0,0 +1,57 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# See file LICENSE for terms. + +import atexit + +from kvikio._lib import cufile_driver # type: ignore + +# TODO: Wrap nicely, maybe as a dataclass? +DriverProperties = cufile_driver.DriverProperties + + +def driver_open() -> None: + """Open the cuFile driver + + cuFile accept multiple calls to `driver_open()`, only the first call + opens the driver, but every call must have a matching call to + `driver_close()`. + + Normally, it is not required to open and close the cuFile driver since + it is done automatically. + + Raises + ------ + RuntimeError + If cuFile isn't available. + """ + return cufile_driver.driver_open() + + +def driver_close() -> None: + """Close the cuFile driver + + cuFile accept multiple calls to `driver_open()`, only the first call + opens the driver, but every call must have a matching call to + `driver_close()`. + + Raises + ------ + RuntimeError + If cuFile isn't available. + """ + return cufile_driver.driver_close() + + +def initialize() -> None: + """Open the cuFile driver and close it again at module exit + + Normally, it is not required to open and close the cuFile driver since + it is done automatically. + + Raises + ------ + RuntimeError + If cuFile isn't available. + """ + driver_open() + atexit.register(driver_close) diff --git a/python/kvikio/tests/test_cufile_driver.py b/python/kvikio/tests/test_cufile_driver.py new file mode 100644 index 0000000000..d50fb8652e --- /dev/null +++ b/python/kvikio/tests/test_cufile_driver.py @@ -0,0 +1,19 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# See file LICENSE for terms. + +import pytest + +import kvikio.cufile_driver +import kvikio.defaults + + +@pytest.mark.skipif( + kvikio.defaults.compat_mode(), + reason=( + "cannot test the cuFile driver when the " + "test is running in compatibility mode" + ), +) +def test_open_and_close(): + kvikio.cufile_driver.driver_open() + kvikio.cufile_driver.driver_close() From edcbe60cf33bee75bdd479149c71bc1e080e1df6 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Sun, 27 Oct 2024 08:44:06 +0100 Subject: [PATCH 06/14] CI: test whweel use --verbose --- ci/test_wheel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 94a31b04b6..301c27188a 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -9,4 +9,4 @@ RAPIDS_PY_WHEEL_NAME="kvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-fr python -m pip install "$(echo ${WHEELHOUSE}/kvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" -python -m pytest ./python/kvikio/tests +python -m pytest --verbose ./python/kvikio/tests From 032c326d84ad73e9e28b6371bd7b0bd8432e61fe Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Sun, 27 Oct 2024 08:44:47 +0100 Subject: [PATCH 07/14] Enable test_open_and_close always We want to trigger CI error if a package was built without cufile support --- python/kvikio/tests/test_cufile_driver.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/python/kvikio/tests/test_cufile_driver.py b/python/kvikio/tests/test_cufile_driver.py index d50fb8652e..b0cf267bd1 100644 --- a/python/kvikio/tests/test_cufile_driver.py +++ b/python/kvikio/tests/test_cufile_driver.py @@ -1,19 +1,10 @@ # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. -import pytest - import kvikio.cufile_driver import kvikio.defaults -@pytest.mark.skipif( - kvikio.defaults.compat_mode(), - reason=( - "cannot test the cuFile driver when the " - "test is running in compatibility mode" - ), -) def test_open_and_close(): kvikio.cufile_driver.driver_open() kvikio.cufile_driver.driver_close() From 77826eeaf30406c2d63cd31bb85299e6a350fee2 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Sun, 27 Oct 2024 10:59:52 +0100 Subject: [PATCH 08/14] adding a pytest mark "cufile" for tests that requires cufile --- python/kvikio/pyproject.toml | 3 +++ python/kvikio/tests/test_cufile_driver.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/python/kvikio/pyproject.toml b/python/kvikio/pyproject.toml index 25a961a858..7922f173ce 100644 --- a/python/kvikio/pyproject.toml +++ b/python/kvikio/pyproject.toml @@ -144,3 +144,6 @@ filterwarnings = [ "ignore:Jitify is performing a one-time only warm-up to populate the persistent cache", "ignore::DeprecationWarning:botocore.*", ] +markers = [ + "cufile: tests to skip if cuFile isn't available e.g. run with `pytest -m 'not cufile'`" +] diff --git a/python/kvikio/tests/test_cufile_driver.py b/python/kvikio/tests/test_cufile_driver.py index b0cf267bd1..0a64bf0952 100644 --- a/python/kvikio/tests/test_cufile_driver.py +++ b/python/kvikio/tests/test_cufile_driver.py @@ -1,10 +1,12 @@ # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. +import pytest + import kvikio.cufile_driver -import kvikio.defaults +@pytest.mark.cufile def test_open_and_close(): kvikio.cufile_driver.driver_open() kvikio.cufile_driver.driver_close() From b61ebffaf34bb8540432c40f68740d7d48284538 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Sun, 27 Oct 2024 15:23:09 +0100 Subject: [PATCH 09/14] CI: not run "cufile" marked tests on arm64+cuda11.8 --- ci/run_pytests.sh | 7 ++++++- ci/test_wheel.sh | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ci/run_pytests.sh b/ci/run_pytests.sh index b2c93dbe56..1a7edb5be5 100755 --- a/ci/run_pytests.sh +++ b/ci/run_pytests.sh @@ -6,4 +6,9 @@ set -euo pipefail # Support invoking run_pytests.sh outside the script directory cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/kvikio -pytest --cache-clear --verbose "$@" tests +# If running CUDA 11.8 on arm64, we skip tests marked "cufile" since +# cuFile didn't support arm until 12.4 +[[ "${CUDA_VERSION}" == "11.8.0" && "${RUNNER_ARCH}" == "ARM64" ]] \ + && PYTEST_MARK=( -m 'not cufile' ) || PYTEST_MARK=() + +pytest --cache-clear --verbose "${PYTEST_MARK[@]}" "$@" tests diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 301c27188a..a3f014ca3f 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -9,4 +9,9 @@ RAPIDS_PY_WHEEL_NAME="kvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-fr python -m pip install "$(echo ${WHEELHOUSE}/kvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" -python -m pytest --verbose ./python/kvikio/tests +# If running CUDA 11.8 on arm64, we skip tests marked "cufile" since +# cuFile didn't support arm until 12.4 +[[ "${CUDA_VERSION}" == "11.8.0" && "${RUNNER_ARCH}" == "ARM64" ]] \ + && PYTEST_MARK=( -m 'not cufile' ) || PYTEST_MARK=() + +python -m pytest --cache-clear --verbose "${PYTEST_MARK[@]}" ./python/kvikio/tests From ee71aac335adbe702101e77c3a9fa406450d6d2c Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 30 Oct 2024 11:18:15 +0100 Subject: [PATCH 10/14] rename _Driver* => Driver* --- cpp/include/kvikio/shim/cufile.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index e4f49527dc..411359e6d7 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -54,8 +54,8 @@ class cuFileAPI { private: // Don't call driver open and close directly, use `.driver_open()` and `.driver_close()`. - decltype(cuFileDriverOpen)* _DriverOpen{nullptr}; - decltype(cuFileDriverClose)* _DriverClose{nullptr}; + decltype(cuFileDriverOpen)* DriverOpen{nullptr}; + decltype(cuFileDriverClose)* DriverClose{nullptr}; public: bool stream_available = false; @@ -81,8 +81,8 @@ class cuFileAPI { get_symbol(Write, lib, KVIKIO_STRINGIFY(cuFileWrite)); get_symbol(BufRegister, lib, KVIKIO_STRINGIFY(cuFileBufRegister)); get_symbol(BufDeregister, lib, KVIKIO_STRINGIFY(cuFileBufDeregister)); - get_symbol(_DriverOpen, lib, KVIKIO_STRINGIFY(cuFileDriverOpen)); - get_symbol(_DriverClose, lib, KVIKIO_STRINGIFY(cuFileDriverClose)); + get_symbol(DriverOpen, lib, KVIKIO_STRINGIFY(cuFileDriverOpen)); + get_symbol(DriverClose, lib, KVIKIO_STRINGIFY(cuFileDriverClose)); get_symbol(DriverGetProperties, lib, KVIKIO_STRINGIFY(cuFileDriverGetProperties)); get_symbol(DriverSetPollMode, lib, KVIKIO_STRINGIFY(cuFileDriverSetPollMode)); get_symbol(DriverSetMaxCacheSize, lib, KVIKIO_STRINGIFY(cuFileDriverSetMaxCacheSize)); @@ -149,7 +149,7 @@ class cuFileAPI { */ void driver_open() { - CUfileError_t const error = _DriverOpen(); + CUfileError_t const error = DriverOpen(); if (error.err != CU_FILE_SUCCESS) { throw std::runtime_error(std::string{"Unable to open GDS file driver: "} + cufileop_status_error(error.err)); @@ -161,7 +161,7 @@ class cuFileAPI { */ void driver_close() { - CUfileError_t const error = _DriverClose(); + CUfileError_t const error = DriverClose(); if (error.err != CU_FILE_SUCCESS) { throw std::runtime_error(std::string{"Unable to close GDS file driver: "} + cufileop_status_error(error.err)); From 2fb04a6aab4e5c396001acfce3308ef73b89b9e0 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 30 Oct 2024 11:23:01 +0100 Subject: [PATCH 11/14] clean up DriverProperties --- cpp/include/kvikio/shim/cufile.hpp | 11 ++++++----- python/kvikio/kvikio/__init__.py | 5 ----- python/kvikio/kvikio/benchmarks/utils.py | 3 ++- python/kvikio/kvikio/cufile_driver.py | 1 + 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index 411359e6d7..6fc99b8a82 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -109,11 +109,12 @@ class cuFileAPI { } #endif - // cuFile is supposed to open and close the driver automatically but because of a bug in - // CUDA 11.8, it sometimes segfault. See . - if (!stream_available) { // The stream API was introduced in CUDA 12.2. - driver_open(); - } + // cuFile is supposed to open and close the driver automatically but + // because of a bug in cuFile v1.4 (CUDA v11.8), it sometimes segfault: + // . + // We use the stream API as an version indicator of cuFile, it was introduced + // in cuFile v1.7 (CUDA v12.2). + if (!stream_available) { driver_open(); } } // Notice, we have to close the driver at program exit (if we opened it) even though we are diff --git a/python/kvikio/kvikio/__init__.py b/python/kvikio/kvikio/__init__.py index 5a3bc459e5..a2bfffaf48 100644 --- a/python/kvikio/kvikio/__init__.py +++ b/python/kvikio/kvikio/__init__.py @@ -1,15 +1,10 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. -from kvikio._lib import cufile_driver # type: ignore from kvikio._version import __git_commit__, __version__ from kvikio.cufile import CuFile from kvikio.remote_file import RemoteFile, is_remote_file_available -# TODO: Wrap nicely, maybe as a dataclass? -DriverProperties = cufile_driver.DriverProperties - - __all__ = [ "__git_commit__", "__version__", diff --git a/python/kvikio/kvikio/benchmarks/utils.py b/python/kvikio/kvikio/benchmarks/utils.py index 69375b8c21..23c7731f24 100644 --- a/python/kvikio/kvikio/benchmarks/utils.py +++ b/python/kvikio/kvikio/benchmarks/utils.py @@ -12,6 +12,7 @@ from dask.utils import format_bytes import kvikio +import kvikio.cufile_driver import kvikio.defaults @@ -26,7 +27,7 @@ def drop_vm_cache() -> None: def pprint_sys_info() -> None: """Pretty print system information""" - props = kvikio.DriverProperties() + props = kvikio.cufile_driver.DriverProperties() try: import pynvml diff --git a/python/kvikio/kvikio/cufile_driver.py b/python/kvikio/kvikio/cufile_driver.py index 8a0e625a55..8bbb68abba 100644 --- a/python/kvikio/kvikio/cufile_driver.py +++ b/python/kvikio/kvikio/cufile_driver.py @@ -6,6 +6,7 @@ from kvikio._lib import cufile_driver # type: ignore # TODO: Wrap nicely, maybe as a dataclass? +# DriverProperties = cufile_driver.DriverProperties From 51e680501d96f67dfb2cee1d13b6fd1b939005b2 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 30 Oct 2024 11:43:54 +0100 Subject: [PATCH 12/14] Update cpp/include/kvikio/shim/cufile.hpp Co-authored-by: Lawrence Mitchell --- cpp/include/kvikio/shim/cufile.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index 6fc99b8a82..2b8722c233 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -145,7 +145,7 @@ class cuFileAPI { /** * @brief Open the cuFile driver * - * cuFile accept multiple calls to `cufileDriverOpen()`, only the first call opens + * cuFile allows multiple calls to `cufileDriverOpen()`, only the first call opens * the driver, but every call should have a matching call to `cufileDriverClose()`. */ void driver_open() From c55c5af3361606c2dba14954434f04d41b2817cd Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 30 Oct 2024 11:44:14 +0100 Subject: [PATCH 13/14] Update python/kvikio/kvikio/cufile_driver.py Co-authored-by: Lawrence Mitchell --- python/kvikio/kvikio/cufile_driver.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/kvikio/kvikio/cufile_driver.py b/python/kvikio/kvikio/cufile_driver.py index 8bbb68abba..35909f54ab 100644 --- a/python/kvikio/kvikio/cufile_driver.py +++ b/python/kvikio/kvikio/cufile_driver.py @@ -49,6 +49,10 @@ def initialize() -> None: Normally, it is not required to open and close the cuFile driver since it is done automatically. + Notes + ----- + Registers an atexit handler that calls :func:`driver_close`. + Raises ------ RuntimeError From 6af4af55355f04c0477e548e575fa55c753a709b Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Thu, 31 Oct 2024 21:55:54 +0100 Subject: [PATCH 14/14] Apply suggestions from code review Co-authored-by: Vyas Ramasubramani --- cpp/include/kvikio/shim/cufile.hpp | 4 ++-- python/kvikio/kvikio/cufile_driver.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/kvikio/shim/cufile.hpp b/cpp/include/kvikio/shim/cufile.hpp index 2b8722c233..c5c7a0671f 100644 --- a/cpp/include/kvikio/shim/cufile.hpp +++ b/cpp/include/kvikio/shim/cufile.hpp @@ -110,9 +110,9 @@ class cuFileAPI { #endif // cuFile is supposed to open and close the driver automatically but - // because of a bug in cuFile v1.4 (CUDA v11.8), it sometimes segfault: + // because of a bug in cuFile v1.4 (CUDA v11.8) it sometimes segfaults: // . - // We use the stream API as an version indicator of cuFile, it was introduced + // We use the stream API as a version indicator of cuFile since it was introduced // in cuFile v1.7 (CUDA v12.2). if (!stream_available) { driver_open(); } } diff --git a/python/kvikio/kvikio/cufile_driver.py b/python/kvikio/kvikio/cufile_driver.py index 35909f54ab..e78242a514 100644 --- a/python/kvikio/kvikio/cufile_driver.py +++ b/python/kvikio/kvikio/cufile_driver.py @@ -13,7 +13,7 @@ def driver_open() -> None: """Open the cuFile driver - cuFile accept multiple calls to `driver_open()`, only the first call + cuFile accepts multiple calls to `driver_open()`. Only the first call opens the driver, but every call must have a matching call to `driver_close()`. @@ -31,7 +31,7 @@ def driver_open() -> None: def driver_close() -> None: """Close the cuFile driver - cuFile accept multiple calls to `driver_open()`, only the first call + cuFile accepts multiple calls to `driver_open()`. Only the first call opens the driver, but every call must have a matching call to `driver_close()`.