Skip to content

Commit

Permalink
Python bindings to cuFileDriverOpen() and cuFileDriverClose() (#514)
Browse files Browse the repository at this point in the history
Changes:

- Adding Python bindings to `cuFileDriverOpen()` and `cuFileDriverClose()`. 
- We now [only open the cufile driver explicitly](#160) in CUDA versions older than v12.2.
- Introducing `kvikio.cufile_driver.initialize()`, which open the cuFile driver and close it again at module exit.
- Let CI fail if KvikIO wasn't built with cuFile support.
  * Except on cuda11.8+arm64; cuFile didn't support arm until cuda v12.4.
- Some refactor and clean up!

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #514
  • Loading branch information
madsbk authored Nov 1, 2024
1 parent fc56da6 commit c9209aa
Show file tree
Hide file tree
Showing 17 changed files with 178 additions and 38 deletions.
7 changes: 6 additions & 1 deletion ci/run_pytests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,9 @@ set -euo pipefail
# Support invoking run_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/kvikio

pytest --cache-clear --verbose "$@" tests
# If running CUDA 11.8 on arm64, we skip tests marked "cufile" since
# cuFile didn't support arm until 12.4
[[ "${CUDA_VERSION}" == "11.8.0" && "${RUNNER_ARCH}" == "ARM64" ]] \
&& PYTEST_MARK=( -m 'not cufile' ) || PYTEST_MARK=()

pytest --cache-clear --verbose "${PYTEST_MARK[@]}" "$@" tests
7 changes: 6 additions & 1 deletion ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,9 @@ RAPIDS_PY_WHEEL_NAME="kvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-fr

python -m pip install "$(echo ${WHEELHOUSE}/kvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]"

python -m pytest ./python/kvikio/tests
# If running CUDA 11.8 on arm64, we skip tests marked "cufile" since
# cuFile didn't support arm until 12.4
[[ "${CUDA_VERSION}" == "11.8.0" && "${RUNNER_ARCH}" == "ARM64" ]] \
&& PYTEST_MARK=( -m 'not cufile' ) || PYTEST_MARK=()

python -m pytest --cache-clear --verbose "${PYTEST_MARK[@]}" ./python/kvikio/tests
2 changes: 1 addition & 1 deletion cpp/examples/basic_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

#include <kvikio/batch.hpp>
#include <kvikio/buffer.hpp>
#include <kvikio/cufile/driver.hpp>
#include <kvikio/defaults.hpp>
#include <kvikio/driver.hpp>
#include <kvikio/error.hpp>
#include <kvikio/file_handle.hpp>

Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/basic_no_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

#include <kvikio/batch.hpp>
#include <kvikio/buffer.hpp>
#include <kvikio/cufile/driver.hpp>
#include <kvikio/defaults.hpp>
#include <kvikio/driver.hpp>
#include <kvikio/error.hpp>
#include <kvikio/file_handle.hpp>

Expand Down
18 changes: 17 additions & 1 deletion cpp/examples/downstream/downstream_example.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,23 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <iostream>

#include <kvikio/cufile/driver.hpp>
#include <kvikio/defaults.hpp>
#include <kvikio/driver.hpp>

using namespace std;

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,7 +45,7 @@ inline void set_driver_flag(unsigned int& prop, unsigned int flag, bool val) noe
class DriverInitializer {
// Optional, if not used cuFiles opens the driver automatically
public:
DriverInitializer() { CUFILE_TRY(cuFileAPI::instance().DriverOpen()); }
DriverInitializer() { cuFileAPI::instance().driver_open(); }

DriverInitializer(DriverInitializer const&) = delete;
DriverInitializer& operator=(DriverInitializer const&) = delete;
Expand All @@ -55,7 +55,7 @@ class DriverInitializer {
~DriverInitializer()
{
try {
CUFILE_TRY(cuFileAPI::instance().DriverClose());
cuFileAPI::instance().driver_close();
} catch (const CUfileException& e) {
std::cerr << "Unable to close GDS file driver: ";
std::cerr << e.what();
Expand Down
5 changes: 1 addition & 4 deletions cpp/include/kvikio/file_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,12 @@

#include <cstddef>
#include <cstdlib>
#include <iostream>
#include <numeric>
#include <optional>
#include <stdexcept>
#include <system_error>
#include <utility>

#include <kvikio/buffer.hpp>
#include <kvikio/cufile_config.hpp>
#include <kvikio/cufile/config.hpp>
#include <kvikio/defaults.hpp>
#include <kvikio/error.hpp>
#include <kvikio/parallel_operation.hpp>
Expand Down
65 changes: 48 additions & 17 deletions cpp/include/kvikio/shim/cufile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#pragma once

#include <stdexcept>
#include <string>

#include <iostream>
#include <kvikio/shim/cufile_h_wrapper.hpp>
#include <kvikio/shim/utils.hpp>

Expand All @@ -38,8 +38,6 @@ class cuFileAPI {
decltype(cuFileWrite)* Write{nullptr};
decltype(cuFileBufRegister)* BufRegister{nullptr};
decltype(cuFileBufDeregister)* BufDeregister{nullptr};
decltype(cuFileDriverOpen)* DriverOpen{nullptr};
decltype(cuFileDriverClose)* DriverClose{nullptr};
decltype(cuFileDriverGetProperties)* DriverGetProperties{nullptr};
decltype(cuFileDriverSetPollMode)* DriverSetPollMode{nullptr};
decltype(cuFileDriverSetMaxCacheSize)* DriverSetMaxCacheSize{nullptr};
Expand All @@ -54,6 +52,12 @@ class cuFileAPI {
decltype(cuFileStreamRegister)* StreamRegister{nullptr};
decltype(cuFileStreamDeregister)* StreamDeregister{nullptr};

private:
// Don't call driver open and close directly, use `.driver_open()` and `.driver_close()`.
decltype(cuFileDriverOpen)* DriverOpen{nullptr};
decltype(cuFileDriverClose)* DriverClose{nullptr};

public:
bool stream_available = false;

private:
Expand Down Expand Up @@ -105,25 +109,25 @@ class cuFileAPI {
}
#endif

// cuFile is supposed to open and close the driver automatically but because of a bug in
// CUDA 11.8, it sometimes segfault. See <https://github.com/rapidsai/kvikio/issues/159>.
CUfileError_t const error = DriverOpen();
if (error.err != CU_FILE_SUCCESS) {
throw std::runtime_error(std::string{"cuFile error at: "} + __FILE__ + ":" +
KVIKIO_STRINGIFY(__LINE__) + ": " +
cufileop_status_error(error.err));
}
// cuFile is supposed to open and close the driver automatically but
// because of a bug in cuFile v1.4 (CUDA v11.8) it sometimes segfaults:
// <https://github.com/rapidsai/kvikio/issues/159>.
// We use the stream API as a version indicator of cuFile since it was introduced
// in cuFile v1.7 (CUDA v12.2).
if (!stream_available) { driver_open(); }
}

// Notice, we have to close the driver at program exit (if we opened it) even though we are
// not allowed to call CUDA after main[1]. This is because, cuFile will segfault if the
// driver isn't closed on program exit i.e. we are doomed if we do, doomed if we don't, but
// this seems to be the lesser of two evils.
// [1] <https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#initialization>
~cuFileAPI()
{
CUfileError_t const error = DriverClose();
if (error.err != CU_FILE_SUCCESS) {
std::cerr << "Unable to close GDS file driver: " << cufileop_status_error(error.err)
<< std::endl;
}
if (!stream_available) { driver_close(); }
}
#else
cuFileAPI() { throw std::runtime_error(CUFILE_ERRSTR(0)); }
cuFileAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); }
#endif

public:
Expand All @@ -137,6 +141,33 @@ class cuFileAPI {
static cuFileAPI _instance;
return _instance;
}

/**
* @brief Open the cuFile driver
*
* cuFile allows multiple calls to `cufileDriverOpen()`, only the first call opens
* the driver, but every call should have a matching call to `cufileDriverClose()`.
*/
void driver_open()
{
CUfileError_t const error = DriverOpen();
if (error.err != CU_FILE_SUCCESS) {
throw std::runtime_error(std::string{"Unable to open GDS file driver: "} +
cufileop_status_error(error.err));
}
}

/**
* @brief Close the cuFile driver
*/
void driver_close()
{
CUfileError_t const error = DriverClose();
if (error.err != CU_FILE_SUCCESS) {
throw std::runtime_error(std::string{"Unable to close GDS file driver: "} +
cufileop_status_error(error.err));
}
}
};

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/kvikio/stream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
#pragma once

#include <sys/types.h>
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <kvikio/error.hpp>
#include <kvikio/shim/cuda.hpp>
#include <kvikio/shim/cufile.hpp>
Expand Down
5 changes: 0 additions & 5 deletions python/kvikio/kvikio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

from kvikio._lib import driver_properties # type: ignore
from kvikio._version import __git_commit__, __version__
from kvikio.cufile import CuFile
from kvikio.remote_file import RemoteFile, is_remote_file_available

# TODO: Wrap nicely, maybe as a dataclass?
DriverProperties = driver_properties.DriverProperties


__all__ = [
"__git_commit__",
"__version__",
Expand Down
2 changes: 1 addition & 1 deletion python/kvikio/kvikio/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# =============================================================================

# Set the list of Cython files to build, one .so per file
set(cython_modules arr.pyx buffer.pyx defaults.pyx driver_properties.pyx file_handle.pyx future.pyx
set(cython_modules arr.pyx buffer.pyx defaults.pyx cufile_driver.pyx file_handle.pyx future.pyx
libnvcomp.pyx libnvcomp_ll.pyx
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,20 @@
from libcpp cimport bool


cdef extern from "<kvikio/driver.hpp>" nogil:
cdef extern from "<kvikio/shim/cufile.hpp>" nogil:
cdef void cpp_driver_open "kvikio::cuFileAPI::instance().driver_open"() except +
cdef void cpp_driver_close "kvikio::cuFileAPI::instance().driver_close"() except +


def driver_open():
cpp_driver_open()


def driver_close():
cpp_driver_close()


cdef extern from "<kvikio/cufile/driver.hpp>" nogil:
cdef cppclass cpp_DriverProperties "kvikio::DriverProperties":
cpp_DriverProperties() except +
bool is_gds_available() except +
Expand Down
3 changes: 2 additions & 1 deletion python/kvikio/kvikio/benchmarks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from dask.utils import format_bytes

import kvikio
import kvikio.cufile_driver
import kvikio.defaults


Expand All @@ -26,7 +27,7 @@ def drop_vm_cache() -> None:
def pprint_sys_info() -> None:
"""Pretty print system information"""

props = kvikio.DriverProperties()
props = kvikio.cufile_driver.DriverProperties()
try:
import pynvml

Expand Down
62 changes: 62 additions & 0 deletions python/kvikio/kvikio/cufile_driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

import atexit

from kvikio._lib import cufile_driver # type: ignore

# TODO: Wrap nicely, maybe as a dataclass?
# <https://github.com/rapidsai/kvikio/issues/526>
DriverProperties = cufile_driver.DriverProperties


def driver_open() -> None:
"""Open the cuFile driver
cuFile accepts multiple calls to `driver_open()`. Only the first call
opens the driver, but every call must have a matching call to
`driver_close()`.
Normally, it is not required to open and close the cuFile driver since
it is done automatically.
Raises
------
RuntimeError
If cuFile isn't available.
"""
return cufile_driver.driver_open()


def driver_close() -> None:
"""Close the cuFile driver
cuFile accepts multiple calls to `driver_open()`. Only the first call
opens the driver, but every call must have a matching call to
`driver_close()`.
Raises
------
RuntimeError
If cuFile isn't available.
"""
return cufile_driver.driver_close()


def initialize() -> None:
"""Open the cuFile driver and close it again at module exit
Normally, it is not required to open and close the cuFile driver since
it is done automatically.
Notes
-----
Registers an atexit handler that calls :func:`driver_close`.
Raises
------
RuntimeError
If cuFile isn't available.
"""
driver_open()
atexit.register(driver_close)
3 changes: 3 additions & 0 deletions python/kvikio/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,6 @@ filterwarnings = [
"ignore:Jitify is performing a one-time only warm-up to populate the persistent cache",
"ignore::DeprecationWarning:botocore.*",
]
markers = [
"cufile: tests to skip if cuFile isn't available e.g. run with `pytest -m 'not cufile'`"
]
12 changes: 12 additions & 0 deletions python/kvikio/tests/test_cufile_driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
# See file LICENSE for terms.

import pytest

import kvikio.cufile_driver


@pytest.mark.cufile
def test_open_and_close():
kvikio.cufile_driver.driver_open()
kvikio.cufile_driver.driver_close()

0 comments on commit c9209aa

Please sign in to comment.