Skip to content

Commit

Permalink
handle pycapsule interface objects in write_deltalake
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed May 21, 2024
1 parent 319229d commit 77d03f0
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion python/deltalake/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import pyarrow as pa
import pyarrow.dataset as ds
import pyarrow.fs as pa_fs
from pyarrow.lib import RecordBatchReader
from pyarrow import RecordBatchReader

from ._internal import DeltaDataChecker as _DeltaDataChecker
from ._internal import batch_distinct
Expand Down Expand Up @@ -300,6 +300,17 @@ def write_deltalake(
data = convert_pyarrow_table(
pa.Table.from_pandas(data), large_dtypes=large_dtypes
)
elif hasattr(data, "__arrow_c_array__"):
data = convert_pyarrow_recordbatch(pa.record_batch(data), large_dtypes)
elif hasattr(data, "__arrow_c_stream__"):
if not hasattr(RecordBatchReader, "from_stream"):
raise ValueError(
"pyarrow 15 or later required to read stream via pycapsule interface"
)

data = convert_pyarrow_recordbatchreader(
RecordBatchReader.from_stream(data), large_dtypes
)
elif isinstance(data, Iterable):
if schema is None:
raise ValueError("You must provide schema if data is Iterable")
Expand Down

0 comments on commit 77d03f0

Please sign in to comment.