diff --git a/.github/workflows/test_wheel.yaml b/.github/workflows/test_wheel.yaml index ad74ba06..ff03aff0 100644 --- a/.github/workflows/test_wheel.yaml +++ b/.github/workflows/test_wheel.yaml @@ -38,7 +38,7 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" @@ -50,7 +50,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1.3 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -58,7 +58,7 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - name: Install wheels shell: bash -l {0} diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index d1b97832..e3cbcb21 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -32,9 +32,9 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' @@ -44,7 +44,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -52,7 +52,7 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - uses: actions/upload-artifact@v3 with: @@ -87,9 +87,9 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' @@ -99,7 +99,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -107,7 +107,7 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - uses: actions/upload-artifact@v3 with: @@ -142,9 +142,9 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' @@ -154,7 +154,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -162,7 +162,7 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - uses: actions/upload-artifact@v3 with: @@ -197,9 +197,9 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Set up QEMU if: runner.os == 'Linux' @@ -209,7 +209,7 @@ jobs: - name: Add msbuild to PATH if: runner.os == 'Windows' - uses: microsoft/setup-msbuild@v1 + uses: microsoft/setup-msbuild@v2 - name: delvewheel install if: runner.os == 'Windows' @@ -217,7 +217,7 @@ jobs: python -m pip install delvewheel cython - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - uses: actions/upload-artifact@v3 with: @@ -246,12 +246,12 @@ jobs: fetch-depth: 0 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - name: Build wheels - uses: joerick/cibuildwheel@v2.16.2 + uses: joerick/cibuildwheel@v2.16.5 - uses: actions/upload-artifact@v3 with: diff --git a/fastparquet/__init__.py b/fastparquet/__init__.py index 03ac88ae..17e7f4b6 100755 --- a/fastparquet/__init__.py +++ b/fastparquet/__init__.py @@ -1,8 +1,8 @@ """parquet - read parquet files.""" -from ._version import __version__ -from .writer import write, update_file_custom_metadata -from . import core, schema, converted_types, api -from .api import ParquetFile -from .util import ParquetException +from fastparquet._version import __version__ +from fastparquet.writer import write, update_file_custom_metadata +from fastparquet import core, schema, converted_types, api +from fastparquet.api import ParquetFile +from fastparquet.util import ParquetException diff --git a/fastparquet/api.py b/fastparquet/api.py index 53caba12..f6e55426 100644 --- a/fastparquet/api.py +++ b/fastparquet/api.py @@ -6,16 +6,15 @@ import numpy as np import fsspec -from fastparquet.util import join_path import pandas as pd -from . import core, schema, converted_types, encoding, dataframe, writer -from . import parquet_thrift -from .cencoding import ThriftObject, from_buffer -from .json import json_decoder -from .util import (default_open, default_remove, ParquetException, val_to_num, +from fastparquet import core, schema, converted_types, encoding, dataframe, writer +from fastparquet import parquet_thrift +from fastparquet.cencoding import ThriftObject, from_buffer +from fastparquet.json import json_decoder +from fastparquet.util import (default_open, default_remove, ParquetException, val_to_num, ops, ensure_bytes, ensure_str, check_column_names, metadata_from_many, - ex_from_sep, _strip_path_tail, get_fs, PANDAS_VERSION) + ex_from_sep, _strip_path_tail, get_fs, PANDAS_VERSION, join_path) # Find in names of partition files the integer matching "**part.*.parquet", diff --git a/fastparquet/compression.py b/fastparquet/compression.py index f62fc72f..01188d70 100644 --- a/fastparquet/compression.py +++ b/fastparquet/compression.py @@ -1,7 +1,7 @@ import cramjam import numpy as np -from . import parquet_thrift +from fastparquet import parquet_thrift # TODO: use stream/direct-to-buffer conversions instead of memcopy diff --git a/fastparquet/converted_types.py b/fastparquet/converted_types.py index cd8858e7..d21a5e37 100644 --- a/fastparquet/converted_types.py +++ b/fastparquet/converted_types.py @@ -10,9 +10,9 @@ import numpy as np import pandas as pd -from . import parquet_thrift -from .cencoding import time_shift -from .json import json_decoder +from fastparquet import parquet_thrift +from fastparquet.cencoding import time_shift +from fastparquet.json import json_decoder logger = logging.getLogger('parquet') # pylint: disable=invalid-name diff --git a/fastparquet/core.py b/fastparquet/core.py index 3facf893..79c17762 100644 --- a/fastparquet/core.py +++ b/fastparquet/core.py @@ -1,17 +1,16 @@ -import warnings import numpy as np import pandas as pd -from . import encoding -from . encoding import read_plain +from fastparquet import encoding +from fastparquet.encoding import read_plain import fastparquet.cencoding as encoding -from .compression import decompress_data, rev_map, decom_into -from .converted_types import convert, simple, converts_inplace -from .schema import _is_list_like, _is_map_like -from .speedups import unpack_byte_array -from . import parquet_thrift -from .cencoding import ThriftObject, read_thrift -from .util import val_to_num, ex_from_sep +from fastparquet.compression import decompress_data, rev_map, decom_into +from fastparquet.converted_types import convert, simple, converts_inplace +from fastparquet.schema import _is_list_like, _is_map_like +from fastparquet.speedups import unpack_byte_array +from fastparquet import parquet_thrift +from fastparquet.cencoding import ThriftObject +from fastparquet.util import val_to_num def _read_page(file_obj, page_header, column_metadata): diff --git a/fastparquet/dataframe.py b/fastparquet/dataframe.py index 51ba40c4..afa7d01b 100644 --- a/fastparquet/dataframe.py +++ b/fastparquet/dataframe.py @@ -11,7 +11,7 @@ from pandas.core.arrays.masked import BaseMaskedDtype import warnings -from .util import PANDAS_VERSION +from fastparquet.util import PANDAS_VERSION class Dummy(object): diff --git a/fastparquet/encoding.py b/fastparquet/encoding.py index 3525c18f..8e69c344 100755 --- a/fastparquet/encoding.py +++ b/fastparquet/encoding.py @@ -1,8 +1,8 @@ """encoding.py - methods for reading parquet encoded data blocks.""" import numpy as np -from .cencoding import read_bitpacked1, NumpyIO -from .speedups import unpack_byte_array -from . import parquet_thrift +from fastparquet.cencoding import read_bitpacked1, NumpyIO +from fastparquet.speedups import unpack_byte_array +from fastparquet import parquet_thrift def read_plain_boolean(raw_bytes, count, out=None): diff --git a/fastparquet/evolve.py b/fastparquet/evolve.py new file mode 100644 index 00000000..e69de29b diff --git a/fastparquet/parquet_thrift/__init__.py b/fastparquet/parquet_thrift/__init__.py index c71820ef..85860548 100644 --- a/fastparquet/parquet_thrift/__init__.py +++ b/fastparquet/parquet_thrift/__init__.py @@ -4,7 +4,7 @@ def __getattr__(name): # for compatability with coe that calls, e.g., parquet_thrift.RowGroup(...) - from ..cencoding import ThriftObject + from fastparquet.cencoding import ThriftObject if name[0].isupper(): return partial(ThriftObject.from_fields, thrift_name=name) raise AttributeError(name) diff --git a/fastparquet/schema.py b/fastparquet/schema.py index 2a0e988e..83a43afd 100755 --- a/fastparquet/schema.py +++ b/fastparquet/schema.py @@ -1,7 +1,7 @@ """Utils for working with the parquet thrift models.""" from collections import OrderedDict -from . import parquet_thrift +from fastparquet import parquet_thrift def schema_tree(schema, i=0): diff --git a/fastparquet/thrift_structures.py b/fastparquet/thrift_structures.py index 75410a4a..b8ae51d2 100644 --- a/fastparquet/thrift_structures.py +++ b/fastparquet/thrift_structures.py @@ -1,2 +1,5 @@ -from . import parquet_thrift -from .cencoding import ThriftObject +from fastparquet import parquet_thrift +from fastparquet.cencoding import ThriftObject + + +__all__ = ["ThriftObject", "parquet_thrift"] diff --git a/fastparquet/util.py b/fastparquet/util.py index 4e1c3115..7e93f37b 100644 --- a/fastparquet/util.py +++ b/fastparquet/util.py @@ -14,8 +14,8 @@ import fsspec -from . import parquet_thrift -from .cencoding import ThriftObject +from fastparquet import parquet_thrift +from fastparquet.cencoding import ThriftObject from fastparquet import __version__ PANDAS_VERSION = Version(pd.__version__) diff --git a/fastparquet/writer.py b/fastparquet/writer.py index 873d6954..b023eb2e 100644 --- a/fastparquet/writer.py +++ b/fastparquet/writer.py @@ -11,19 +11,17 @@ from fastparquet.util import join_path -from . import parquet_thrift -from .api import ParquetFile, partitions, part_ids -from .compression import compress_data -from .converted_types import tobson -from .json import json_encoder -from .util import (default_open, default_mkdirs, check_column_names, +from fastparquet import parquet_thrift, __version__, cencoding +from fastparquet.api import ParquetFile, partitions, part_ids +from fastparquet.compression import compress_data +from fastparquet.converted_types import tobson +from fastparquet.json import json_encoder +from fastparquet.util import (default_open, default_mkdirs, check_column_names, created_by, get_column_metadata, norm_col_name, path_string, reset_row_idx, get_fs, update_custom_metadata) -from . import __version__ -from .speedups import array_encode_utf8, pack_byte_array -from . import cencoding -from .cencoding import NumpyIO, ThriftObject, from_buffer +from fastparquet.speedups import array_encode_utf8, pack_byte_array +from fastparquet.cencoding import NumpyIO, ThriftObject, from_buffer from decimal import Decimal MARKER = b'PAR1'