Skip to content

Commit

Permalink
from_http_url
Browse files Browse the repository at this point in the history
  • Loading branch information
madsbk committed Sep 20, 2024
1 parent 6398ddc commit 53a0acc
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 9 deletions.
30 changes: 26 additions & 4 deletions python/kvikio/kvikio/remote_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,39 @@ def _get_remote_module():


class RemoteFile:
"""File handle of a remote file (currently, only AWS S3 is supported)."""
"""File handle of a remote file."""

def __init__(self, url: str, nbytes: Optional[int] = None):
"""Open a remote file given a bucket and object name.
def __init__(self, handle):
"""Create a remote file from a Cython handle.
This constructor should not be called directly instead use a
factory method like `RemoteFile.from_http_url()`
Parameters
----------
handle : kvikio._lib.remote_handle.RemoteFile
The Cython handle
"""
assert isinstance(handle, _get_remote_module().RemoteFile)
self._handle = handle

@classmethod
def from_http_url(
cls,
url: str,
nbytes: Optional[int] = None,
) -> RemoteFile:
"""Open a http file.
Parameters
----------
url
URL to the remote file.
nbytes
The size of the file. If None, KvikIO will ask the server
for the file size.
"""
self._handle = _get_remote_module().RemoteFile.from_url(url, nbytes)
return RemoteFile(_get_remote_module().RemoteFile.from_url(url, nbytes))

def __enter__(self) -> RemoteFile:
return self
Expand Down
10 changes: 5 additions & 5 deletions python/kvikio/tests/test_http_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def http_server(request, tmpdir):
def test_file_size(http_server, tmpdir):
a = np.arange(100)
a.tofile(tmpdir / "a")
with kvikio.RemoteFile(f"{http_server}/a") as f:
with kvikio.RemoteFile.from_http_url(f"{http_server}/a") as f:
assert f.nbytes() == a.nbytes


Expand All @@ -64,7 +64,7 @@ def test_read(http_server, tmpdir, xp, size, nthreads, tasksize):

with kvikio.defaults.set_num_threads(nthreads):
with kvikio.defaults.set_task_size(tasksize):
with kvikio.RemoteFile(f"{http_server}/a") as f:
with kvikio.RemoteFile.from_http_url(f"{http_server}/a") as f:
assert f.nbytes() == a.nbytes
b = xp.empty_like(a)
assert f.read(b) == a.nbytes
Expand All @@ -77,7 +77,7 @@ def test_large_read(http_server, tmpdir, xp, nthreads):
a.tofile(tmpdir / "a")

with kvikio.defaults.set_num_threads(nthreads):
with kvikio.RemoteFile(f"{http_server}/a") as f:
with kvikio.RemoteFile.from_http_url(f"{http_server}/a") as f:
assert f.nbytes() == a.nbytes
b = xp.empty_like(a)
assert f.read(b) == a.nbytes
Expand All @@ -88,7 +88,7 @@ def test_error_too_small_file(http_server, tmpdir, xp):
a = xp.arange(10, dtype="uint8")
b = xp.empty(100, dtype="uint8")
a.tofile(tmpdir / "a")
with kvikio.RemoteFile(f"{http_server}/a") as f:
with kvikio.RemoteFile.from_http_url(f"{http_server}/a") as f:
assert f.nbytes() == a.nbytes
with pytest.raises(
ValueError, match=r"cannot read 0\+100 bytes into a 10 bytes file"
Expand All @@ -105,7 +105,7 @@ def test_no_range_support(http_server, tmpdir, xp):
a = xp.arange(100, dtype="uint8")
a.tofile(tmpdir / "a")
b = xp.empty_like(a)
with kvikio.RemoteFile(f"{http_server}/a") as f:
with kvikio.RemoteFile.from_http_url(f"{http_server}/a") as f:
assert f.nbytes() == a.nbytes
with pytest.raises(
OverflowError, match="maybe the server doesn't support file ranges?"
Expand Down

0 comments on commit 53a0acc

Please sign in to comment.