From 5859f29da3c93c6f9122d4f0272e491be79d9893 Mon Sep 17 00:00:00 2001 From: manu-sj Date: Thu, 5 Sep 2024 19:34:15 +0200 Subject: [PATCH 1/8] making polars an optional dependency --- python/hsfs/core/arrow_flight_client.py | 11 +- python/hsfs/core/constants.py | 28 +++++ python/hsfs/core/vector_server.py | 33 ++++-- python/hsfs/engine/python.py | 130 ++++++++++++++++-------- python/hsfs/feature_group.py | 15 ++- python/hsfs/feature_store.py | 7 +- python/hsfs/feature_view.py | 54 ++++++---- python/hsfs/storage_connector.py | 10 +- python/pyproject.toml | 2 +- python/tests/engine/test_python.py | 67 +++++++++++- 10 files changed, 273 insertions(+), 84 deletions(-) create mode 100644 python/hsfs/core/constants.py diff --git a/python/hsfs/core/arrow_flight_client.py b/python/hsfs/core/arrow_flight_client.py index f8dbb7d992..cd5f61844c 100644 --- a/python/hsfs/core/arrow_flight_client.py +++ b/python/hsfs/core/arrow_flight_client.py @@ -23,19 +23,23 @@ from functools import wraps from typing import Any, Dict, Optional, Union -import polars as pl import pyarrow import pyarrow._flight import pyarrow.flight from hsfs import client, feature_group, util from hsfs.client.exceptions import FeatureStoreException from hsfs.constructor import query +from hsfs.core.constants import HAS_POLARS, polars_not_installed_message from hsfs.core.variable_api import VariableApi from hsfs.storage_connector import StorageConnector from pyarrow.flight import FlightServerError from retrying import retry +if HAS_POLARS: + import polars as pl + + _logger = logging.getLogger(__name__) @@ -399,7 +403,10 @@ def _get_dataset(self, descriptor, timeout=None, dataframe_type="pandas"): reader = self._connection.do_get(info.endpoints[0].ticket, options) _logger.debug("Dataset fetched. Converting to dataframe %s.", dataframe_type) if dataframe_type.lower() == "polars": - return pl.from_arrow(reader.read_all()) + if HAS_POLARS: + return pl.from_arrow(reader.read_all()) + else: + raise ModuleNotFoundError(polars_not_installed_message) else: return reader.read_pandas() diff --git a/python/hsfs/core/constants.py b/python/hsfs/core/constants.py new file mode 100644 index 0000000000..cdff9369fc --- /dev/null +++ b/python/hsfs/core/constants.py @@ -0,0 +1,28 @@ +# +# Copyright 2024 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import importlib.util + + +polars_not_installed_message = ( + "Polars package not found. " + "If you want to use Polars with Hopsworks you can install the corresponding extras " + """`pip install hopsworks[polars]` or `pip install "hopsworks[polars]"` if using zsh. """ + "You can also install polars directly in your environment e.g `pip install polars`. " + "You will need to restart your kernel if applicable." +) + +HAS_POLARS: bool = importlib.util.find_spec("polars") is not None diff --git a/python/hsfs/core/vector_server.py b/python/hsfs/core/vector_server.py index 9eca5dd3cd..1f27406650 100755 --- a/python/hsfs/core/vector_server.py +++ b/python/hsfs/core/vector_server.py @@ -20,13 +20,23 @@ from base64 import b64decode from datetime import datetime, timezone from io import BytesIO -from typing import Any, Callable, Dict, List, Literal, Optional, Set, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Literal, + Optional, + Set, + Tuple, + Union, +) import avro.io import avro.schema import numpy as np import pandas as pd -import polars as pl from hsfs import ( client, feature_view, @@ -49,6 +59,7 @@ from hsfs.core import ( transformation_function_engine as tf_engine_mod, ) +from hsfs.core.constants import HAS_POLARS, polars_not_installed_message HAS_FASTAVRO = False @@ -59,6 +70,9 @@ except ImportError: from avro.io import BinaryDecoder +if HAS_POLARS or TYPE_CHECKING: + import polars as pl + _logger = logging.getLogger(__name__) @@ -487,11 +501,16 @@ def handle_feature_vector_return_type( return pandas_df elif return_type.lower() == "polars": _logger.debug("Returning feature vector as polars dataframe") - return pl.DataFrame( - feature_vectorz if batch else [feature_vectorz], - schema=self._feature_vector_col_name if not inference_helper else None, - orient="row", - ) + if HAS_POLARS: + return pl.DataFrame( + feature_vectorz if batch else [feature_vectorz], + schema=self._feature_vector_col_name + if not inference_helper + else None, + orient="row", + ) + else: + raise ModuleNotFoundError(polars_not_installed_message) else: raise ValueError( f"""Unknown return type. Supported return types are {"'list', 'numpy'" if not inference_helper else "'dict'"}, 'polars' and 'pandas''""" diff --git a/python/hsfs/engine/python.py b/python/hsfs/engine/python.py index 8e64e6ec95..c9e3f3fa0d 100644 --- a/python/hsfs/engine/python.py +++ b/python/hsfs/engine/python.py @@ -30,7 +30,17 @@ from datetime import datetime, timezone from io import BytesIO from pathlib import Path -from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Literal, + Optional, + Tuple, + Union, +) import avro import boto3 @@ -38,7 +48,6 @@ import hsfs import numpy as np import pandas as pd -import polars as pl import pyarrow as pa import pytz from botocore.response import StreamingBody @@ -70,6 +79,7 @@ transformation_function_engine, variable_api, ) +from hsfs.core.constants import HAS_POLARS, polars_not_installed_message from hsfs.core.vector_db_client import VectorDbClient from hsfs.feature_group import ExternalFeatureGroup, FeatureGroup from hsfs.training_dataset import TrainingDataset @@ -84,6 +94,9 @@ # Disable pyhive INFO logging logging.getLogger("pyhive").setLevel(logging.WARNING) +if HAS_POLARS or TYPE_CHECKING: + import polars as pl + HAS_FAST = False try: from fastavro import schemaless_writer @@ -206,7 +219,6 @@ def _sql_offline( hive_config: Optional[Dict[str, Any]] = None, arrow_flight_config: Optional[Dict[str, Any]] = None, ) -> Union[pd.DataFrame, pl.DataFrame]: - self._validate_dataframe_type(dataframe_type) if isinstance(sql_query, dict) and "query_string" in sql_query: result_df = util.run_with_loading_animation( @@ -224,12 +236,15 @@ def _sql_offline( with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) if dataframe_type.lower() == "polars": - result_df = util.run_with_loading_animation( - "Reading data from Hopsworks, using Hive", - pl.read_database, - sql_query, - hive_conn, - ) + if HAS_POLARS: + result_df = util.run_with_loading_animation( + "Reading data from Hopsworks, using Hive", + pl.read_database, + sql_query, + hive_conn, + ) + else: + raise ModuleNotFoundError(polars_not_installed_message) else: result_df = util.run_with_loading_animation( "Reading data from Hopsworks, using Hive", @@ -264,7 +279,10 @@ def _jdbc( if "sqlalchemy" in str(type(mysql_conn)): sql_query = sql.text(sql_query) if dataframe_type.lower() == "polars": - result_df = pl.read_database(sql_query, mysql_conn) + if HAS_POLARS: + result_df = pl.read_database(sql_query, mysql_conn) + else: + raise ModuleNotFoundError(polars_not_installed_message) else: result_df = pd.read_sql(sql_query, mysql_conn) if schema: @@ -300,6 +318,8 @@ def read( # Below check performed since some files materialized when creating training data are empty # If empty dataframe is in df_list then polars cannot concatenate df_list due to schema mismatch # However if the entire split contains only empty files which can occur when the data size is very small then one of the empty dataframe is return so that the column names can be accessed. + if not HAS_POLARS: + raise ModuleNotFoundError(polars_not_installed_message) non_empty_df_list = [df for df in df_list if not df.is_empty()] if non_empty_df_list: return self._return_dataframe_type( @@ -329,6 +349,8 @@ def _read_pandas(self, data_format: str, obj: Any) -> pd.DataFrame: ) def _read_polars(self, data_format: str, obj: Any) -> pl.DataFrame: + if not HAS_POLARS: + raise ModuleNotFoundError(polars_not_installed_message) if data_format.lower() == "csv": return pl.read_csv(obj) elif data_format.lower() == "tsv": @@ -510,13 +532,20 @@ def show( sql_query, feature_store, online_conn, "default", read_options or {} ).head(n) - def read_vector_db(self, feature_group: "hsfs.feature_group.FeatureGroup", n: int =None, dataframe_type: str="default") -> Union[pd.DataFrame, pl.DataFrame, np.ndarray, List[List[Any]]]: + def read_vector_db( + self, + feature_group: "hsfs.feature_group.FeatureGroup", + n: int = None, + dataframe_type: str = "default", + ) -> Union[pd.DataFrame, pl.DataFrame, np.ndarray, List[List[Any]]]: dataframe_type = dataframe_type.lower() self._validate_dataframe_type(dataframe_type) results = VectorDbClient.read_feature_group(feature_group, n) feature_names = [f.name for f in feature_group.features] if dataframe_type == "polars": + if not HAS_POLARS: + raise ModuleNotFoundError(polars_not_installed_message) df = pl.DataFrame(results, schema=feature_names) else: df = pd.DataFrame(results, columns=feature_names, index=None) @@ -576,7 +605,9 @@ def profile( exact_uniqueness: bool = True, ) -> str: # TODO: add statistics for correlations, histograms and exact_uniqueness - if isinstance(df, pl.DataFrame) or isinstance(df, pl.dataframe.frame.DataFrame): + if HAS_POLARS and ( + isinstance(df, pl.DataFrame) or isinstance(df, pl.dataframe.frame.DataFrame) + ): arrow_schema = df.to_arrow().schema else: arrow_schema = pa.Schema.from_pandas(df, preserve_index=False) @@ -589,8 +620,9 @@ def profile( or pa.types.is_large_list(field.type) or pa.types.is_struct(field.type) ) and PYARROW_HOPSWORKS_DTYPE_MAPPING[field.type] in ["timestamp", "date"]: - if isinstance(df, pl.DataFrame) or isinstance( - df, pl.dataframe.frame.DataFrame + if HAS_POLARS and ( + isinstance(df, pl.DataFrame) + or isinstance(df, pl.dataframe.frame.DataFrame) ): df = df.with_columns(pl.col(field.name).cast(pl.String)) else: @@ -609,8 +641,9 @@ def profile( stats[col] = df[col].describe().to_dict() final_stats = [] for col in relevant_columns: - if isinstance(df, pl.DataFrame) or isinstance( - df, pl.dataframe.frame.DataFrame + if HAS_POLARS and ( + isinstance(df, pl.DataFrame) + or isinstance(df, pl.dataframe.frame.DataFrame) ): stats[col] = dict(zip(stats["statistic"], stats[col])) # set data type @@ -694,8 +727,9 @@ def validate_with_great_expectations( ) -> ge.core.ExpectationSuiteValidationResult: # This conversion might cause a bottleneck in performance when using polars with greater expectations. # This patch is done becuase currently great_expecatations does not support polars, would need to be made proper when support added. - if isinstance(dataframe, pl.DataFrame) or isinstance( - dataframe, pl.dataframe.frame.DataFrame + if HAS_POLARS and ( + isinstance(dataframe, pl.DataFrame) + or isinstance(dataframe, pl.dataframe.frame.DataFrame) ): warnings.warn( "Currently Great Expectations does not support Polars dataframes. This operation will convert to Pandas dataframe that can be slow.", @@ -716,10 +750,12 @@ def set_job_group(self, group_id: str, description: Optional[str]) -> None: def convert_to_default_dataframe( self, dataframe: Union[pd.DataFrame, pl.DataFrame, pl.dataframe.frame.DataFrame] ) -> Optional[pd.DataFrame]: - if ( - isinstance(dataframe, pd.DataFrame) - or isinstance(dataframe, pl.DataFrame) - or isinstance(dataframe, pl.dataframe.frame.DataFrame) + if isinstance(dataframe, pd.DataFrame) or ( + HAS_POLARS + and ( + isinstance(dataframe, pl.DataFrame) + or isinstance(dataframe, pl.dataframe.frame.DataFrame) + ) ): upper_case_features = [ col for col in dataframe.columns if any(re.finditer("[A-Z]", col)) @@ -762,7 +798,7 @@ def convert_to_default_dataframe( dataframe_copy[col].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype ): dataframe_copy[col] = dataframe_copy[col].dt.tz_convert(None) - elif isinstance(dataframe_copy[col].dtype, pl.Datetime): + elif HAS_POLARS and isinstance(dataframe_copy[col].dtype, pl.Datetime): dataframe_copy = dataframe_copy.with_columns( pl.col(col).dt.replace_time_zone(None) ) @@ -782,8 +818,10 @@ def parse_schema_feature_group( ) -> List[feature.Feature]: if isinstance(dataframe, pd.DataFrame): arrow_schema = pa.Schema.from_pandas(dataframe, preserve_index=False) - elif isinstance(dataframe, pl.DataFrame) or isinstance( - dataframe, pl.dataframe.frame.DataFrame + elif ( + HAS_POLARS + and isinstance(dataframe, pl.DataFrame) + or isinstance(dataframe, pl.dataframe.frame.DataFrame) ): arrow_schema = dataframe.to_arrow().schema features = [] @@ -1005,13 +1043,16 @@ def _random_split( groups += [i] * int(df_size * split.percentage) groups += [len(splits) - 1] * (df_size - len(groups)) random.shuffle(groups) - if isinstance(df, pl.DataFrame) or isinstance(df, pl.dataframe.frame.DataFrame): + if HAS_POLARS and ( + isinstance(df, pl.DataFrame) or isinstance(df, pl.dataframe.frame.DataFrame) + ): df = df.with_columns(pl.Series(name=split_column, values=groups)) else: df[split_column] = groups for i, split in enumerate(splits): - if isinstance(df, pl.DataFrame) or isinstance( - df, pl.dataframe.frame.DataFrame + if HAS_POLARS and ( + isinstance(df, pl.DataFrame) + or isinstance(df, pl.dataframe.frame.DataFrame) ): split_df = df.filter(pl.col(split_column) == i).drop(split_column) else: @@ -1237,8 +1278,9 @@ def _apply_transformation_function( feature_name, transformation_fn, ) in transformation_functions.items(): - if isinstance(dataset, pl.DataFrame) or isinstance( - dataset, pl.dataframe.frame.DataFrame + if HAS_POLARS and ( + isinstance(dataset, pl.DataFrame) + or isinstance(dataset, pl.dataframe.frame.DataFrame) ): dataset = dataset.with_columns( pl.col(feature_name).map_elements( @@ -1251,8 +1293,11 @@ def _apply_transformation_function( ) # The below functions is not required for Polars since polars does have object types like pandas if not ( - isinstance(dataset, pl.DataFrame) - or isinstance(dataset, pl.dataframe.frame.DataFrame) + HAS_POLARS + and ( + isinstance(dataset, pl.DataFrame) + or isinstance(dataset, pl.dataframe.frame.DataFrame) + ) ): offline_type = Engine.convert_spark_type_to_offline_type( transformation_fn.output_type @@ -1439,8 +1484,11 @@ def acked(err: Exception, msg: Any) -> None: elif not isinstance( feature_group, ExternalFeatureGroup ) and self._start_offline_materialization(offline_write_options): - if (not offline_write_options.get("skip_offsets", False) - and self._job_api.last_execution(feature_group.materialization_job)): # always skip offsets if executing job for the first time + if not offline_write_options.get( + "skip_offsets", False + ) and self._job_api.last_execution( + feature_group.materialization_job + ): # always skip offsets if executing job for the first time # don't provide the current offsets (read from where the job last left off) initial_check_point = "" # provide the initial_check_point as it will reduce the read amplification of materialization job @@ -1638,12 +1686,12 @@ def _cast_column_to_offline_type( offline_type = offline_type.lower() if offline_type == "timestamp": # convert (if tz!=UTC) to utc, then make timezone unaware - if isinstance(feature_column, pl.Series): + if HAS_POLARS and isinstance(feature_column, pl.Series): return feature_column.cast(pl.Datetime(time_zone=None)) else: return pd.to_datetime(feature_column, utc=True).dt.tz_localize(None) elif offline_type == "date": - if isinstance(feature_column, pl.Series): + if HAS_POLARS and isinstance(feature_column, pl.Series): return feature_column.cast(pl.Date) else: return pd.to_datetime(feature_column, utc=True).dt.date @@ -1652,7 +1700,7 @@ def _cast_column_to_offline_type( or offline_type.startswith("struct<") or offline_type == "boolean" ): - if isinstance(feature_column, pl.Series): + if HAS_POLARS and isinstance(feature_column, pl.Series): return feature_column.map_elements( lambda x: (ast.literal_eval(x) if isinstance(x, str) else x) if (x is not None and x != "") @@ -1665,14 +1713,14 @@ def _cast_column_to_offline_type( else None ) elif offline_type == "string": - if isinstance(feature_column, pl.Series): + if HAS_POLARS and isinstance(feature_column, pl.Series): return feature_column.map_elements( lambda x: str(x) if x is not None else None ) else: return feature_column.apply(lambda x: str(x) if x is not None else None) elif offline_type.startswith("decimal"): - if isinstance(feature_column, pl.Series): + if HAS_POLARS and isinstance(feature_column, pl.Series): return feature_column.map_elements( lambda x: decimal.Decimal(x) if (x is not None) else None ) @@ -1681,7 +1729,7 @@ def _cast_column_to_offline_type( lambda x: decimal.Decimal(x) if (x is not None) else None ) else: - if isinstance(feature_column, pl.Series): + if HAS_POLARS and isinstance(feature_column, pl.Series): offline_dtype_mapping = { "bigint": pl.Int64, "int": pl.Int32, @@ -1700,7 +1748,7 @@ def _cast_column_to_offline_type( "double": np.dtype("float64"), } if offline_type in offline_dtype_mapping: - if isinstance(feature_column, pl.Series): + if HAS_POLARS and isinstance(feature_column, pl.Series): casted_feature = feature_column.cast( offline_dtype_mapping[offline_type] ) diff --git a/python/hsfs/feature_group.py b/python/hsfs/feature_group.py index de5577417c..116d0f5e23 100644 --- a/python/hsfs/feature_group.py +++ b/python/hsfs/feature_group.py @@ -21,7 +21,7 @@ import time import warnings from datetime import date, datetime -from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeVar, Union import avro.schema import confluent_kafka @@ -29,7 +29,6 @@ import humps import numpy as np import pandas as pd -import polars as pl from hsfs import ( engine, feature, @@ -78,6 +77,9 @@ from hsfs.validation_report import ValidationReport +if TYPE_CHECKING: + import polars as pl + _logger = logging.getLogger(__name__) @@ -543,8 +545,13 @@ def get_storage_connector(self): """ storage_connector_provenance = self.get_storage_connector_provenance() - if storage_connector_provenance.inaccessible or storage_connector_provenance.deleted: - _logger.info("The parent storage connector is deleted or inaccessible. For more details access `get_storage_connector_provenance`") + if ( + storage_connector_provenance.inaccessible + or storage_connector_provenance.deleted + ): + _logger.info( + "The parent storage connector is deleted or inaccessible. For more details access `get_storage_connector_provenance`" + ) if storage_connector_provenance.accessible: return storage_connector_provenance.accessible[0] diff --git a/python/hsfs/feature_store.py b/python/hsfs/feature_store.py index c8a18dc6c0..73de870f7f 100644 --- a/python/hsfs/feature_store.py +++ b/python/hsfs/feature_store.py @@ -18,14 +18,13 @@ import datetime import warnings -from typing import Any, Dict, List, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar, Union import great_expectations as ge import humps import numpy import numpy as np import pandas as pd -import polars as pl from hsfs import ( expectation_suite, feature, @@ -53,6 +52,10 @@ from hsfs.transformation_function import TransformationFunction +if TYPE_CHECKING: + import polars as pl + + @typechecked class FeatureStore: DEFAULT_VERSION = 1 diff --git a/python/hsfs/feature_view.py b/python/hsfs/feature_view.py index d4ea5fc51d..cea53fb121 100644 --- a/python/hsfs/feature_view.py +++ b/python/hsfs/feature_view.py @@ -20,12 +20,22 @@ import logging import warnings from datetime import date, datetime -from typing import Any, Dict, List, Literal, Optional, Set, Tuple, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Literal, + Optional, + Set, + Tuple, + TypeVar, + Union, +) import humps import numpy as np import pandas as pd -import polars as pl from hsfs import ( feature_group, storage_connector, @@ -61,25 +71,29 @@ from hsfs.training_dataset_split import TrainingDatasetSplit -_logger = logging.getLogger(__name__) +if TYPE_CHECKING: + import polars as pl + + TrainingDatasetDataFrameTypes = Union[ + pd.DataFrame, + TypeVar("pyspark.sql.DataFrame"), # noqa: F821 + TypeVar("pyspark.RDD"), # noqa: F821 + np.ndarray, + List[List[Any]], + pl.DataFrame, + ] -TrainingDatasetDataFrameTypes = Union[ - pd.DataFrame, - TypeVar("pyspark.sql.DataFrame"), # noqa: F821 - TypeVar("pyspark.RDD"), # noqa: F821 - np.ndarray, - List[List[Any]], - pl.DataFrame, -] - -SplineDataFrameTypes = Union[ - pd.DataFrame, - TypeVar("pyspark.sql.DataFrame"), # noqa: F821 - TypeVar("pyspark.RDD"), # noqa: F821 - np.ndarray, - List[List[Any]], - TypeVar("SplineGroup"), # noqa: F821 -] + SplineDataFrameTypes = Union[ + pd.DataFrame, + TypeVar("pyspark.sql.DataFrame"), # noqa: F821 + TypeVar("pyspark.RDD"), # noqa: F821 + np.ndarray, + List[List[Any]], + TypeVar("SplineGroup"), # noqa: F821 + ] + + +_logger = logging.getLogger(__name__) @typechecked diff --git a/python/hsfs/storage_connector.py b/python/hsfs/storage_connector.py index 96596a5b0f..c37cfdd289 100644 --- a/python/hsfs/storage_connector.py +++ b/python/hsfs/storage_connector.py @@ -21,16 +21,18 @@ import re import warnings from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar, Union import humps import numpy as np import pandas as pd -import polars as pl from hsfs import client, engine from hsfs.core import storage_connector_api +if TYPE_CHECKING: + import polars as pl + _logger = logging.getLogger(__name__) @@ -211,7 +213,9 @@ def get_feature_groups(self): feature_groups_provenance = self.get_feature_groups_provenance() if feature_groups_provenance.inaccessible or feature_groups_provenance.deleted: - _logger.info("There are deleted or inaccessible feature groups. For more details access `get_feature_groups_provenance`") + _logger.info( + "There are deleted or inaccessible feature groups. For more details access `get_feature_groups_provenance`" + ) if feature_groups_provenance.accessible: return feature_groups_provenance.accessible diff --git a/python/pyproject.toml b/python/pyproject.toml index 0ba86dfc4a..f1bc1f908f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -46,7 +46,6 @@ dependencies = [ "fsspec", "retrying", "hopsworks_aiomysql[sa]==0.2.1", - "polars>=0.20.18,<=0.21.0", "opensearch-py>=1.1.0,<=2.4.2", ] @@ -81,6 +80,7 @@ python = [ "fastavro>=1.4.11,<=1.8.4", "tqdm", ] +polars=["polars>=0.20.18,<=0.21.0"] [build-system] requires = ["setuptools", "wheel"] diff --git a/python/tests/engine/test_python.py b/python/tests/engine/test_python.py index 08bc8d52a7..c885f4361d 100644 --- a/python/tests/engine/test_python.py +++ b/python/tests/engine/test_python.py @@ -18,7 +18,6 @@ import numpy as np import pandas as pd -import polars as pl import pyarrow as pa import pytest from confluent_kafka.admin import PartitionMetadata, TopicMetadata @@ -35,9 +34,14 @@ from hsfs.constructor import query from hsfs.constructor.hudi_feature_group_alias import HudiFeatureGroupAlias from hsfs.core import inode, job +from hsfs.core.constants import HAS_POLARS from hsfs.engine import python from hsfs.training_dataset_feature import TrainingDatasetFeature -from polars.testing import assert_frame_equal as polars_assert_frame_equal + + +if HAS_POLARS: + import polars as pl + from polars.testing import assert_frame_equal as polars_assert_frame_equal class TestPython: @@ -278,6 +282,10 @@ def test_read_hopsfs_connector_empty_dataframe(self, mocker): assert isinstance(dataframe, pd.DataFrame) assert len(dataframe) == 0 + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_read_hopsfs_connector_empty_dataframe_polars(self, mocker): # Arrange @@ -429,6 +437,10 @@ def test_read_pandas_other(self, mocker): assert mock_pandas_read_csv.call_count == 0 assert mock_pandas_read_parquet.call_count == 0 + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_read_polars_csv(self, mocker): # Arrange mock_pandas_read_csv = mocker.patch("polars.read_csv") @@ -443,6 +455,10 @@ def test_read_polars_csv(self, mocker): assert mock_pandas_read_csv.call_count == 1 assert mock_pandas_read_parquet.call_count == 0 + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_read_polars_tsv(self, mocker): # Arrange mock_pandas_read_csv = mocker.patch("polars.read_csv") @@ -457,6 +473,10 @@ def test_read_polars_tsv(self, mocker): assert mock_pandas_read_csv.call_count == 1 assert mock_pandas_read_parquet.call_count == 0 + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_read_polars_parquet(self, mocker): # Arrange mock_pandas_read_csv = mocker.patch("polars.read_csv") @@ -474,6 +494,10 @@ def test_read_polars_parquet(self, mocker): assert mock_pandas_read_csv.call_count == 0 assert mock_pandas_read_parquet.call_count == 1 + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_read_polars_other(self, mocker): # Arrange mock_pandas_read_csv = mocker.patch("polars.read_csv") @@ -1013,6 +1037,10 @@ def test_profile_pandas_with_null_column(self, mocker): ) assert mock_python_engine_convert_pandas_statistics.call_count == 3 + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_profile_polars(self, mocker): # Arrange mock_python_engine_convert_pandas_statistics = mocker.patch( @@ -1051,6 +1079,10 @@ def test_profile_polars(self, mocker): ) assert mock_python_engine_convert_pandas_statistics.call_count == 3 + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_profile_polars_with_null_column(self, mocker): # Arrange mock_python_engine_convert_pandas_statistics = mocker.patch( @@ -1365,6 +1397,10 @@ def test_convert_to_default_dataframe_pandas_with_spaces(self, mocker): "Feature names are sanitized to use underscore '_' in the feature store." ) + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_convert_to_default_dataframe_polars(self, mocker): # Arrange mock_warnings = mocker.patch("warnings.warn") @@ -1431,6 +1467,10 @@ def test_parse_schema_feature_group_pandas(self, mocker): assert result[0].name == "col1" assert result[1].name == "col2" + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_parse_schema_feature_group_polars(self, mocker): # Arrange mocker.patch("hsfs.engine.python.Engine._convert_pandas_dtype_to_offline_type") @@ -2317,6 +2357,10 @@ def test_split_labels_dataframe_type_pandas(self): assert isinstance(result_df, pd.DataFrame) assert result_df_split is None + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_split_labels_dataframe_type_polars(self): # Arrange python_engine = python.Engine() @@ -2412,6 +2456,10 @@ def test_split_labels_labels_dataframe_type_pandas(self): assert isinstance(result_df, pd.DataFrame) assert isinstance(result_df_split, pd.Series) + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_split_labels_labels_dataframe_type_polars(self): # Arrange python_engine = python.Engine() @@ -3093,6 +3141,10 @@ def test_return_dataframe_type_pandas(self): # Assert assert str(result) == " col1 col2\n0 1 3\n1 2 4" + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_return_dataframe_type_polars(self): # Arrange python_engine = python.Engine() @@ -3269,6 +3321,10 @@ def plus_one(a): assert result["tf_name"][0] == 2 assert result["tf_name"][1] == 3 + @pytest.mark.skipif( + not HAS_POLARS, + reason="Polars is not installed.", + ) def test_apply_transformation_function_polars(self, mocker): # Arrange mocker.patch("hsfs.client.get_instance") @@ -3763,7 +3819,7 @@ def test_materialization_kafka_first_job_execution(self, mocker): args="defaults tests_offsets", await_termination=False, ) - + def test_materialization_kafka_skip_offsets(self, mocker): # Arrange mocker.patch("hsfs.engine.python.Engine._get_kafka_config", return_value={}) @@ -3805,7 +3861,10 @@ def test_materialization_kafka_skip_offsets(self, mocker): python_engine._write_dataframe_kafka( feature_group=fg, dataframe=df, - offline_write_options={"start_offline_materialization": True, "skip_offsets": True}, + offline_write_options={ + "start_offline_materialization": True, + "skip_offsets": True, + }, ) # Assert From fb4f5c575665ecb612df8cb423c1843d92cd32fb Mon Sep 17 00:00:00 2001 From: manu-sj Date: Thu, 5 Sep 2024 19:44:28 +0200 Subject: [PATCH 2/8] removing TYPE_CHECKING variable to allow mkdocs to build --- python/hsfs/core/vector_server.py | 3 +-- python/hsfs/engine/python.py | 3 +-- python/hsfs/feature_group.py | 5 +++-- python/hsfs/feature_store.py | 5 +++-- python/hsfs/feature_view.py | 4 ++-- python/hsfs/storage_connector.py | 5 +++-- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/python/hsfs/core/vector_server.py b/python/hsfs/core/vector_server.py index 1f27406650..a872bba8c9 100755 --- a/python/hsfs/core/vector_server.py +++ b/python/hsfs/core/vector_server.py @@ -21,7 +21,6 @@ from datetime import datetime, timezone from io import BytesIO from typing import ( - TYPE_CHECKING, Any, Callable, Dict, @@ -70,7 +69,7 @@ except ImportError: from avro.io import BinaryDecoder -if HAS_POLARS or TYPE_CHECKING: +if HAS_POLARS or HAS_POLARS: import polars as pl _logger = logging.getLogger(__name__) diff --git a/python/hsfs/engine/python.py b/python/hsfs/engine/python.py index c9e3f3fa0d..ce1f7a393e 100644 --- a/python/hsfs/engine/python.py +++ b/python/hsfs/engine/python.py @@ -31,7 +31,6 @@ from io import BytesIO from pathlib import Path from typing import ( - TYPE_CHECKING, Any, Callable, Dict, @@ -94,7 +93,7 @@ # Disable pyhive INFO logging logging.getLogger("pyhive").setLevel(logging.WARNING) -if HAS_POLARS or TYPE_CHECKING: +if HAS_POLARS: import polars as pl HAS_FAST = False diff --git a/python/hsfs/feature_group.py b/python/hsfs/feature_group.py index 116d0f5e23..00c266eb10 100644 --- a/python/hsfs/feature_group.py +++ b/python/hsfs/feature_group.py @@ -21,7 +21,7 @@ import time import warnings from datetime import date, datetime -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, TypeVar, Union +from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union import avro.schema import confluent_kafka @@ -65,6 +65,7 @@ ) from hsfs.core import feature_monitoring_config as fmc from hsfs.core import feature_monitoring_result as fmr +from hsfs.core.constants import HAS_POLARS from hsfs.core.job import Job from hsfs.core.variable_api import VariableApi from hsfs.core.vector_db_client import VectorDbClient @@ -77,7 +78,7 @@ from hsfs.validation_report import ValidationReport -if TYPE_CHECKING: +if HAS_POLARS: import polars as pl _logger = logging.getLogger(__name__) diff --git a/python/hsfs/feature_store.py b/python/hsfs/feature_store.py index 73de870f7f..d829b93f6d 100644 --- a/python/hsfs/feature_store.py +++ b/python/hsfs/feature_store.py @@ -18,7 +18,7 @@ import datetime import warnings -from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar, Union +from typing import Any, Dict, List, Optional, TypeVar, Union import great_expectations as ge import humps @@ -46,13 +46,14 @@ training_dataset_api, transformation_function_engine, ) +from hsfs.core.constants import HAS_POLARS from hsfs.decorators import typechecked from hsfs.embedding import EmbeddingIndex from hsfs.statistics_config import StatisticsConfig from hsfs.transformation_function import TransformationFunction -if TYPE_CHECKING: +if HAS_POLARS: import polars as pl diff --git a/python/hsfs/feature_view.py b/python/hsfs/feature_view.py index cea53fb121..32cd3b556b 100644 --- a/python/hsfs/feature_view.py +++ b/python/hsfs/feature_view.py @@ -21,7 +21,6 @@ import warnings from datetime import date, datetime from typing import ( - TYPE_CHECKING, Any, Dict, List, @@ -62,6 +61,7 @@ ) from hsfs.core import feature_monitoring_config as fmc from hsfs.core import feature_monitoring_result as fmr +from hsfs.core.constants import HAS_POLARS from hsfs.core.feature_view_api import FeatureViewApi from hsfs.core.vector_db_client import VectorDbClient from hsfs.decorators import typechecked @@ -71,7 +71,7 @@ from hsfs.training_dataset_split import TrainingDatasetSplit -if TYPE_CHECKING: +if HAS_POLARS: import polars as pl TrainingDatasetDataFrameTypes = Union[ diff --git a/python/hsfs/storage_connector.py b/python/hsfs/storage_connector.py index c37cfdd289..cd5c551149 100644 --- a/python/hsfs/storage_connector.py +++ b/python/hsfs/storage_connector.py @@ -21,16 +21,17 @@ import re import warnings from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypeVar, Union +from typing import Any, Dict, List, Optional, TypeVar, Union import humps import numpy as np import pandas as pd from hsfs import client, engine from hsfs.core import storage_connector_api +from hsfs.core.constants import HAS_POLARS -if TYPE_CHECKING: +if HAS_POLARS: import polars as pl _logger = logging.getLogger(__name__) From 998193e19e3231c9d54ea436cccb9cc30456e50d Mon Sep 17 00:00:00 2001 From: manu-sj Date: Thu, 5 Sep 2024 19:49:17 +0200 Subject: [PATCH 3/8] changing not HAS_POLARS --- python/hsfs/core/arrow_flight_client.py | 5 ++--- python/hsfs/core/vector_server.py | 16 +++++++--------- python/hsfs/engine/python.py | 22 +++++++++++----------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/python/hsfs/core/arrow_flight_client.py b/python/hsfs/core/arrow_flight_client.py index cd5f61844c..83679af059 100644 --- a/python/hsfs/core/arrow_flight_client.py +++ b/python/hsfs/core/arrow_flight_client.py @@ -403,10 +403,9 @@ def _get_dataset(self, descriptor, timeout=None, dataframe_type="pandas"): reader = self._connection.do_get(info.endpoints[0].ticket, options) _logger.debug("Dataset fetched. Converting to dataframe %s.", dataframe_type) if dataframe_type.lower() == "polars": - if HAS_POLARS: - return pl.from_arrow(reader.read_all()) - else: + if not HAS_POLARS: raise ModuleNotFoundError(polars_not_installed_message) + return pl.from_arrow(reader.read_all()) else: return reader.read_pandas() diff --git a/python/hsfs/core/vector_server.py b/python/hsfs/core/vector_server.py index a872bba8c9..f32db73920 100755 --- a/python/hsfs/core/vector_server.py +++ b/python/hsfs/core/vector_server.py @@ -500,16 +500,14 @@ def handle_feature_vector_return_type( return pandas_df elif return_type.lower() == "polars": _logger.debug("Returning feature vector as polars dataframe") - if HAS_POLARS: - return pl.DataFrame( - feature_vectorz if batch else [feature_vectorz], - schema=self._feature_vector_col_name - if not inference_helper - else None, - orient="row", - ) - else: + if not HAS_POLARS: raise ModuleNotFoundError(polars_not_installed_message) + + return pl.DataFrame( + feature_vectorz if batch else [feature_vectorz], + schema=self._feature_vector_col_name if not inference_helper else None, + orient="row", + ) else: raise ValueError( f"""Unknown return type. Supported return types are {"'list', 'numpy'" if not inference_helper else "'dict'"}, 'polars' and 'pandas''""" diff --git a/python/hsfs/engine/python.py b/python/hsfs/engine/python.py index ce1f7a393e..3132f343c9 100644 --- a/python/hsfs/engine/python.py +++ b/python/hsfs/engine/python.py @@ -235,15 +235,15 @@ def _sql_offline( with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) if dataframe_type.lower() == "polars": - if HAS_POLARS: - result_df = util.run_with_loading_animation( - "Reading data from Hopsworks, using Hive", - pl.read_database, - sql_query, - hive_conn, - ) - else: + if not HAS_POLARS: raise ModuleNotFoundError(polars_not_installed_message) + + result_df = util.run_with_loading_animation( + "Reading data from Hopsworks, using Hive", + pl.read_database, + sql_query, + hive_conn, + ) else: result_df = util.run_with_loading_animation( "Reading data from Hopsworks, using Hive", @@ -278,10 +278,10 @@ def _jdbc( if "sqlalchemy" in str(type(mysql_conn)): sql_query = sql.text(sql_query) if dataframe_type.lower() == "polars": - if HAS_POLARS: - result_df = pl.read_database(sql_query, mysql_conn) - else: + if not HAS_POLARS: raise ModuleNotFoundError(polars_not_installed_message) + + result_df = pl.read_database(sql_query, mysql_conn) else: result_df = pd.read_sql(sql_query, mysql_conn) if schema: From 3b91c52b8ad1eeb98038061ad14b03018af0a40b Mon Sep 17 00:00:00 2001 From: manu-sj Date: Thu, 5 Sep 2024 20:22:48 +0200 Subject: [PATCH 4/8] adding polars to requirement docs and fixing typo --- python/hsfs/core/vector_server.py | 2 +- requirements-docs.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/hsfs/core/vector_server.py b/python/hsfs/core/vector_server.py index f32db73920..21106e9344 100755 --- a/python/hsfs/core/vector_server.py +++ b/python/hsfs/core/vector_server.py @@ -69,7 +69,7 @@ except ImportError: from avro.io import BinaryDecoder -if HAS_POLARS or HAS_POLARS: +if HAS_POLARS: import polars as pl _logger = logging.getLogger(__name__) diff --git a/requirements-docs.txt b/requirements-docs.txt index d1499a2625..0b7284da18 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -9,3 +9,4 @@ markdown==3.6 pymdown-extensions==10.7.1 mkdocs-macros-plugin==1.0.4 mkdocs-minify-plugin>=0.2.0 +polars==0.20.31 \ No newline at end of file From 8f8dbe7e7ff1d81f4e6f16218a96e7758a566773 Mon Sep 17 00:00:00 2001 From: manu-sj Date: Fri, 6 Sep 2024 16:34:42 +0200 Subject: [PATCH 5/8] bumping up hsfs version --- python/hsfs/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/hsfs/version.py b/python/hsfs/version.py index bd2c9c4f99..0a80f3f6ba 100644 --- a/python/hsfs/version.py +++ b/python/hsfs/version.py @@ -14,4 +14,4 @@ # limitations under the License. # -__version__ = "3.8.0rc0" +__version__ = "3.8.0rc1" From 71ad4dc9e1b88e517b2f362b2240ef231efd9d15 Mon Sep 17 00:00:00 2001 From: manu-sj Date: Fri, 6 Sep 2024 16:39:16 +0200 Subject: [PATCH 6/8] bumping up hsfs java version --- java/beam/pom.xml | 2 +- java/flink/pom.xml | 2 +- java/hsfs/pom.xml | 2 +- java/pom.xml | 2 +- java/spark/pom.xml | 2 +- utils/java/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/java/beam/pom.xml b/java/beam/pom.xml index 342cae22b2..0e53805dd9 100644 --- a/java/beam/pom.xml +++ b/java/beam/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.0-RC0 + “3.8.1-RC1” 4.0.0 diff --git a/java/flink/pom.xml b/java/flink/pom.xml index 9b0805db8e..50495f9b43 100644 --- a/java/flink/pom.xml +++ b/java/flink/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.0-RC0 + “3.8.1-RC1” 4.0.0 diff --git a/java/hsfs/pom.xml b/java/hsfs/pom.xml index d245d10d3d..41ea927929 100644 --- a/java/hsfs/pom.xml +++ b/java/hsfs/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.0-RC0 + “3.8.1-RC1” 4.0.0 diff --git a/java/pom.xml b/java/pom.xml index 1baf33cf4e..a2ad16c980 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -7,7 +7,7 @@ com.logicalclocks hsfs-parent pom - 3.8.0-RC0 + “3.8.1-RC1” hsfs spark diff --git a/java/spark/pom.xml b/java/spark/pom.xml index 5d7cba068a..4d99e405f1 100644 --- a/java/spark/pom.xml +++ b/java/spark/pom.xml @@ -22,7 +22,7 @@ hsfs-parent com.logicalclocks - 3.8.0-RC0 + “3.8.1-RC1” 4.0.0 diff --git a/utils/java/pom.xml b/utils/java/pom.xml index d45916ee87..bd3d0083dd 100644 --- a/utils/java/pom.xml +++ b/utils/java/pom.xml @@ -5,7 +5,7 @@ com.logicalclocks hsfs-utils - 3.8.0-RC0 + “3.8.1-RC1” 3.2.0.0-SNAPSHOT From 535d228d0c4f90b9ecddc5287fa01708b3d235bf Mon Sep 17 00:00:00 2001 From: manu-sj Date: Fri, 6 Sep 2024 16:41:14 +0200 Subject: [PATCH 7/8] bumping up hsfs java version --- java/beam/pom.xml | 2 +- java/flink/pom.xml | 2 +- java/hsfs/pom.xml | 2 +- java/pom.xml | 2 +- java/spark/pom.xml | 2 +- utils/java/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/java/beam/pom.xml b/java/beam/pom.xml index 0e53805dd9..25ca2c56d4 100644 --- a/java/beam/pom.xml +++ b/java/beam/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - “3.8.1-RC1” + 3.8.1-RC1 4.0.0 diff --git a/java/flink/pom.xml b/java/flink/pom.xml index 50495f9b43..eba36de386 100644 --- a/java/flink/pom.xml +++ b/java/flink/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - “3.8.1-RC1” + 3.8.1-RC1 4.0.0 diff --git a/java/hsfs/pom.xml b/java/hsfs/pom.xml index 41ea927929..9eedae6b32 100644 --- a/java/hsfs/pom.xml +++ b/java/hsfs/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - “3.8.1-RC1” + 3.8.1-RC1 4.0.0 diff --git a/java/pom.xml b/java/pom.xml index a2ad16c980..0a2d1b173d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -7,7 +7,7 @@ com.logicalclocks hsfs-parent pom - “3.8.1-RC1” + 3.8.1-RC1 hsfs spark diff --git a/java/spark/pom.xml b/java/spark/pom.xml index 4d99e405f1..cab7b9960c 100644 --- a/java/spark/pom.xml +++ b/java/spark/pom.xml @@ -22,7 +22,7 @@ hsfs-parent com.logicalclocks - “3.8.1-RC1” + 3.8.1-RC1 4.0.0 diff --git a/utils/java/pom.xml b/utils/java/pom.xml index bd3d0083dd..d7576ce3df 100644 --- a/utils/java/pom.xml +++ b/utils/java/pom.xml @@ -5,7 +5,7 @@ com.logicalclocks hsfs-utils - “3.8.1-RC1” + 3.8.1-RC1 3.2.0.0-SNAPSHOT From 74306f5eb8601ffa0ddf4264c7cde88d5d1f090a Mon Sep 17 00:00:00 2001 From: manu-sj Date: Fri, 6 Sep 2024 16:42:26 +0200 Subject: [PATCH 8/8] bumping up hsfs java version --- java/beam/pom.xml | 2 +- java/flink/pom.xml | 2 +- java/hsfs/pom.xml | 2 +- java/pom.xml | 2 +- java/spark/pom.xml | 2 +- utils/java/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/java/beam/pom.xml b/java/beam/pom.xml index 25ca2c56d4..aea546d239 100644 --- a/java/beam/pom.xml +++ b/java/beam/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.1-RC1 + 3.8.0-RC1 4.0.0 diff --git a/java/flink/pom.xml b/java/flink/pom.xml index eba36de386..e51122de22 100644 --- a/java/flink/pom.xml +++ b/java/flink/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.1-RC1 + 3.8.0-RC1 4.0.0 diff --git a/java/hsfs/pom.xml b/java/hsfs/pom.xml index 9eedae6b32..c8e99e2a6c 100644 --- a/java/hsfs/pom.xml +++ b/java/hsfs/pom.xml @@ -5,7 +5,7 @@ hsfs-parent com.logicalclocks - 3.8.1-RC1 + 3.8.0-RC1 4.0.0 diff --git a/java/pom.xml b/java/pom.xml index 0a2d1b173d..0cc46465f8 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -7,7 +7,7 @@ com.logicalclocks hsfs-parent pom - 3.8.1-RC1 + 3.8.0-RC1 hsfs spark diff --git a/java/spark/pom.xml b/java/spark/pom.xml index cab7b9960c..4c4dea00e1 100644 --- a/java/spark/pom.xml +++ b/java/spark/pom.xml @@ -22,7 +22,7 @@ hsfs-parent com.logicalclocks - 3.8.1-RC1 + 3.8.0-RC1 4.0.0 diff --git a/utils/java/pom.xml b/utils/java/pom.xml index d7576ce3df..6366083f53 100644 --- a/utils/java/pom.xml +++ b/utils/java/pom.xml @@ -5,7 +5,7 @@ com.logicalclocks hsfs-utils - 3.8.1-RC1 + 3.8.0-RC1 3.2.0.0-SNAPSHOT