diff --git a/python/kvikio/kvikio/_lib/remote_handle.pyx b/python/kvikio/kvikio/_lib/remote_handle.pyx index 357a965595..11563007dc 100644 --- a/python/kvikio/kvikio/_lib/remote_handle.pyx +++ b/python/kvikio/kvikio/_lib/remote_handle.pyx @@ -77,14 +77,15 @@ cdef class RemoteFile: return ret @classmethod - def open_s3_from_http_url( + def open_s3( cls, - url: str, + bucket_name: str, + object_name: str, nbytes: Optional[int], ): cdef RemoteFile ret = RemoteFile() cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint]( - _to_string(url) + _to_string(bucket_name), _to_string(object_name) ) if nbytes is None: ret._handle = make_unique[cpp_RemoteHandle](move(ep)) @@ -94,15 +95,14 @@ cdef class RemoteFile: return ret @classmethod - def open_s3( + def open_s3_from_http_url( cls, - bucket_name: str, - object_name: str, + url: str, nbytes: Optional[int], ): cdef RemoteFile ret = RemoteFile() cdef unique_ptr[cpp_S3Endpoint] ep = make_unique[cpp_S3Endpoint]( - _to_string(bucket_name), _to_string(object_name) + _to_string(url) ) if nbytes is None: ret._handle = make_unique[cpp_RemoteHandle](move(ep)) diff --git a/python/kvikio/kvikio/benchmarks/s3_io.py b/python/kvikio/kvikio/benchmarks/s3_io.py index 6130885442..4311f5f012 100644 --- a/python/kvikio/kvikio/benchmarks/s3_io.py +++ b/python/kvikio/kvikio/benchmarks/s3_io.py @@ -91,7 +91,7 @@ def run_numpy_like(args, xp): def run() -> float: t0 = time.perf_counter() - with kvikio.RemoteFile.open_s3_from_http_url(url) as f: + with kvikio.RemoteFile.open_s3_url(url) as f: res = f.read(recv) t1 = time.perf_counter() assert res == args.nbytes, f"IO mismatch, expected {args.nbytes} got {res}" diff --git a/python/kvikio/kvikio/remote_file.py b/python/kvikio/kvikio/remote_file.py index 5227126278..c4f93d86d9 100644 --- a/python/kvikio/kvikio/remote_file.py +++ b/python/kvikio/kvikio/remote_file.py @@ -75,6 +75,26 @@ def open_s3( object_name: str, nbytes: Optional[int] = None, ) -> RemoteFile: + """Open a AWS S3 file from a bucket name and object name. + + Please make sure to set the AWS environment variables: + - `AWS_DEFAULT_REGION` + - `AWS_ACCESS_KEY_ID` + - `AWS_SECRET_ACCESS_KEY` + + Additionally, to overwrite the AWS endpoint, set `AWS_ENDPOINT_URL`. + See + + Parameters + ---------- + bucket_name + The bucket name of the file. + object_name + The object name of the file. + nbytes + The size of the file. If None, KvikIO will ask the server + for the file size. + """ return RemoteFile( _get_remote_module().RemoteFile.open_s3(bucket_name, object_name, nbytes) ) @@ -85,32 +105,39 @@ def open_s3_url( url: str, nbytes: Optional[int] = None, ) -> RemoteFile: + """Open a AWS S3 file from an URL. + + The `url` can take two forms: + - A full http url such as "http://127.0.0.1/my/file", or + - A S3 url such as "s3:///". + + Please make sure to set the AWS environment variables: + - `AWS_DEFAULT_REGION` + - `AWS_ACCESS_KEY_ID` + - `AWS_SECRET_ACCESS_KEY` + + Additionally, if `url` is a S3 url, it is possible to overwrite the AWS endpoint + by setting `AWS_ENDPOINT_URL`. + See + + Parameters + ---------- + url + Either a http url or a S3 url. + nbytes + The size of the file. If None, KvikIO will ask the server + for the file size. + """ url = url.lower() if url.startswith("http://") or url.startswith("https://"): - return cls.open_s3_from_http_url(url, nbytes) + return RemoteFile( + _get_remote_module().RemoteFile.open_s3_from_http_url(url, nbytes) + ) if url.startswith("s://"): - return cls.open_s3_from_s3_url(url, nbytes) - raise ValueError(f"Unsupported protocol in url: {url}") - - @classmethod - def open_s3_from_http_url( - cls, - url: str, - nbytes: Optional[int] = None, - ) -> RemoteFile: - return RemoteFile( - _get_remote_module().RemoteFile.open_s3_from_http_url(url, nbytes) - ) - - @classmethod - def open_s3_from_s3_url( - cls, - url: str, - nbytes: Optional[int] = None, - ) -> RemoteFile: - return RemoteFile( - _get_remote_module().RemoteFile.open_s3_from_s3_url(url, nbytes) - ) + return RemoteFile( + _get_remote_module().RemoteFile.open_s3_from_s3_url(url, nbytes) + ) + raise ValueError(f"Unsupported protocol: {url}") def close(self) -> None: """Close the file""" diff --git a/python/kvikio/tests/test_s3_io.py b/python/kvikio/tests/test_s3_io.py index 2daab28700..1893d6b6d2 100644 --- a/python/kvikio/tests/test_s3_io.py +++ b/python/kvikio/tests/test_s3_io.py @@ -125,7 +125,7 @@ def test_read_with_file_offset(s3_base, xp, start, end): s3_base=s3_base, bucket=bucket_name, files={object_name: bytes(a)} ) as server_address: url = f"{server_address}/{bucket_name}/{object_name}" - with kvikio.RemoteFile.open_s3_from_http_url(url) as f: + with kvikio.RemoteFile.open_s3_url(url) as f: b = xp.zeros(shape=(end - start,), dtype=xp.int64) assert f.read(b, file_offset=start * a.itemsize) == b.nbytes xp.testing.assert_array_equal(a[start:end], b)