Skip to content

Commit

Permalink
feat: support for writing to buffers
Browse files Browse the repository at this point in the history
  • Loading branch information
felixscherz committed Aug 28, 2024
1 parent 36ed695 commit 92c2449
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
9 changes: 9 additions & 0 deletions fastparquet/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1546,3 +1546,12 @@ def test_read_a_non_pandas_parquet_file(tempdir):

assert parquet_file.count() == 2
assert parquet_file.head(1).equals(pd.DataFrame({"foo": [0], "bar": ["a"]}))


def test_writing_to_buffer_does_not_close():
df = pd.DataFrame({"val": [1, 2]})
buffer = io.BytesIO()
write(buffer, df, file_scheme="simple")
assert not buffer.closed
parquet_file = ParquetFile(buffer)
assert parquet_file.count() == 2
14 changes: 9 additions & 5 deletions fastparquet/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,11 +957,8 @@ def write_simple(fn, data, fmd, row_group_offsets=None, compression=None,
if isinstance(data, pd.DataFrame):
data = iter_dataframe(data, row_group_offsets)
mode = 'rb+' if append else 'wb'
if hasattr(fn, "write"):
of = fn
else:
of = open_with(fn, mode)
with of as f:

def write_to_file(f):
if append:
f.seek(-8, 2)
head_size = struct.unpack('<I', f.read(4))[0]
Expand All @@ -981,6 +978,13 @@ def write_simple(fn, data, fmd, row_group_offsets=None, compression=None,
f.write(struct.pack(b"<I", foot_size))
f.write(MARKER)

if hasattr(fn, "write"):
write_to_file(fn)
else:
of = open_with(fn, mode)
with of as f:
write_to_file(f)


def write_multi(dn, data, fmd, row_group_offsets=None, compression=None,
file_scheme='hive', write_fmd=True, open_with=default_open,
Expand Down

0 comments on commit 92c2449

Please sign in to comment.