diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 247a6f6cf8..646e676297 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -56,7 +56,7 @@ rapids_find_package( ) rapids_find_package( - AWSSDK REQUIRED COMPONENTS s3 + AWSSDK COMPONENTS s3 BUILD_EXPORT_SET kvikio-exports INSTALL_EXPORT_SET kvikio-exports ) diff --git a/python/kvikio/kvikio/__init__.py b/python/kvikio/kvikio/__init__.py index 8da63e3d9a..5b7fcb4261 100644 --- a/python/kvikio/kvikio/__init__.py +++ b/python/kvikio/kvikio/__init__.py @@ -4,7 +4,7 @@ from kvikio._lib import buffer, driver_properties # type: ignore from kvikio._version import __git_commit__, __version__ # noqa: F401 from kvikio.cufile import CuFile # noqa: F401 -from kvikio.remote_file import RemoteFile # noqa: F401 +from kvikio.remote_file import RemoteFile, is_remote_file_available # noqa: F401 def memory_register(buf) -> None: diff --git a/python/kvikio/kvikio/_lib/CMakeLists.txt b/python/kvikio/kvikio/_lib/CMakeLists.txt index 0353c9c912..a60ec79c86 100644 --- a/python/kvikio/kvikio/_lib/CMakeLists.txt +++ b/python/kvikio/kvikio/_lib/CMakeLists.txt @@ -14,9 +14,16 @@ # Set the list of Cython files to build, one .so per file set(cython_modules arr.pyx libnvcomp.pyx libnvcomp_ll.pyx file_handle.pyx driver_properties.pyx - future.pyx buffer.pyx defaults.pyx remote_handle.pyx + future.pyx buffer.pyx defaults.pyx ) +if(AWSSDK_FOUND) + message(STATUS "Building remote_handle.pyx (aws-cpp-sdk-s3 found)") + list(APPEND cython_modules remote_handle.pyx) +else() + message(WARNING "Skipping remote_handle.pyx (aws-cpp-sdk-s3 not found)") +endif() + rapids_cython_create_modules( CXX SOURCE_FILES "${cython_modules}" diff --git a/python/kvikio/kvikio/cufile.py b/python/kvikio/kvikio/cufile.py index 2a0560933a..ead7bc5f7a 100644 --- a/python/kvikio/kvikio/cufile.py +++ b/python/kvikio/kvikio/cufile.py @@ -4,9 +4,7 @@ import pathlib from typing import Optional, Union -from typing_extensions import Self - -from kvikio._lib import file_handle, remote_handle # type: ignore +from kvikio._lib import file_handle # type: ignore class IOFutureStream: @@ -432,33 +430,3 @@ def raw_write( to be a multiple of 4096 bytes. When GDS isn't used, this is less critical. """ return self._handle.write(buf, size, file_offset, dev_offset) - - -class RemoteFile: - """File handle for Remote files""" - - def __init__(self, bucket_name: str, object_name: str): - self._handle = remote_handle.RemoteFile.from_bucket_and_object( - bucket_name, object_name - ) - - @classmethod - def from_url(cls, url: str) -> Self: - ret = object.__new__(cls) - ret._handle = remote_handle.RemoteFile.from_url(url) - return ret - - def __enter__(self) -> "RemoteFile": - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: - pass - - def nbytes(self) -> int: - return self._handle.nbytes() - - def pread(self, buf, size: Optional[int] = None, file_offset: int = 0) -> IOFuture: - return IOFuture(self._handle.pread(buf, size, file_offset)) - - def read(self, buf, size: Optional[int] = None, file_offset: int = 0) -> int: - return self.pread(buf, size, file_offset).get() diff --git a/python/kvikio/kvikio/remote_file.py b/python/kvikio/kvikio/remote_file.py index 4b5ad3d380..11b531c808 100644 --- a/python/kvikio/kvikio/remote_file.py +++ b/python/kvikio/kvikio/remote_file.py @@ -7,22 +7,40 @@ from typing_extensions import Self -from kvikio._lib import remote_handle # type: ignore from kvikio.cufile import IOFuture +def is_remote_file_available() -> bool: + try: + import kvikio._lib.remote_handle # noqa: F401 + except ImportError: + return False + else: + return True + + +def _get_remote_remote_file_class(): + if not is_remote_file_available(): + raise RuntimeError( + "RemoteFile not available, please build KvikIO with AWS S3 support" + ) + import kvikio._lib.remote_handle + + return kvikio._lib.remote_handle.RemoteFile + + class RemoteFile: """File handle of a remote file""" def __init__(self, bucket_name: str, object_name: str): - self._handle = remote_handle.RemoteFile.from_bucket_and_object( + self._handle = _get_remote_remote_file_class().from_bucket_and_object( bucket_name, object_name ) @classmethod def from_url(cls, url: str) -> Self: ret = object.__new__(cls) - ret._handle = remote_handle.RemoteFile.from_url(url) + ret._handle = _get_remote_remote_file_class().from_url(url) return ret def __enter__(self) -> RemoteFile: diff --git a/python/kvikio/tests/test_aws_s3.py b/python/kvikio/tests/test_aws_s3.py index d5f782c3a8..ce2c358976 100644 --- a/python/kvikio/tests/test_aws_s3.py +++ b/python/kvikio/tests/test_aws_s3.py @@ -14,6 +14,12 @@ moto = pytest.importorskip("moto", minversion="3.1.6") boto3 = pytest.importorskip("boto3") +if not kvikio.is_remote_file_available(): + pytest.skip( + "cannot test remote IO, please build KvikIO with with AWS S3 support", + allow_module_level=True, + ) + ThreadedMotoServer = pytest.importorskip("moto.server").ThreadedMotoServer diff --git a/python/kvikio/tests/test_benchmarks.py b/python/kvikio/tests/test_benchmarks.py index c585f9c9b7..30cee08be9 100644 --- a/python/kvikio/tests/test_benchmarks.py +++ b/python/kvikio/tests/test_benchmarks.py @@ -8,6 +8,8 @@ import pytest +import kvikio + benchmarks_path = ( Path(os.path.realpath(__file__)).parent.parent / "kvikio" / "benchmarks" ) @@ -92,6 +94,11 @@ def test_zarr_io(run_cmd, tmp_path, api): def test_aws_s3_io(run_cmd, api): """Test benchmarks/aws_s3_io.py""" + if not kvikio.is_remote_file_available(): + pytest.skip( + "cannot test remote IO, please build KvikIO with with AWS S3 support", + allow_module_level=True, + ) pytest.importorskip("boto3") pytest.importorskip("moto") if "cudf" in api: