From ef383bce465ce9adf6457724865a3d81387d536c Mon Sep 17 00:00:00 2001 From: manu-sj <152865565+manu-sj@users.noreply.github.com> Date: Wed, 20 Nov 2024 15:57:39 +0530 Subject: [PATCH] =?UTF-8?q?[FSTORE-1606]=20Allow=20=C2=B4entries=C2=B4=20t?= =?UTF-8?q?o=20be=20None=20while=20retrieving=20feature=20vectors=20from?= =?UTF-8?q?=20a=20feature=20view=20with=20only=20on-demand=20features=20(#?= =?UTF-8?q?405)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * allow entries to be None when feature view contains only on-demand features * adddressing review comments --- python/hsfs/core/vector_server.py | 38 +++++++++++++++++++++++++++++-- python/hsfs/feature_view.py | 4 ++-- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/python/hsfs/core/vector_server.py b/python/hsfs/core/vector_server.py index 277b25051..d354a5400 100755 --- a/python/hsfs/core/vector_server.py +++ b/python/hsfs/core/vector_server.py @@ -149,6 +149,7 @@ def __init__( self._feature_to_handle_if_sql: Optional[Set[str]] = None self._valid_serving_keys: Set[str] = set() self._serving_initialized: bool = False + self.__all_features_on_demand: Optional[bool] = None def init_serving( self, @@ -415,14 +416,23 @@ def get_feature_vectors( request_parameters is None or len(request_parameters) == 0 or isinstance(request_parameters, dict) + or not entries or len(request_parameters) == len(entries) - ), "Request Parameters should be a Dictionary, None, empty or have the same length as the entries" + ), "Request Parameters should be a Dictionary, None, empty or have the same length as the entries if they are not None or empty." online_client_choice = self.which_client_and_ensure_initialised( force_rest_client=force_rest_client, force_sql_client=force_sql_client ) rondb_entries = [] skipped_empty_entries = [] + + if not entries: + entries = ( + [[] * len(request_parameters)] + if isinstance(request_parameters, list) + else [[]] + ) + for (idx, entry), passed, vector_features in itertools.zip_longest( enumerate(entries), passed_features, @@ -547,7 +557,11 @@ def assemble_feature_vector( # for backward compatibility, before 3.4, if result is empty, # instead of throwing error, it skips the result # Maybe we drop this behaviour for 4.0 - if len(result_dict) == 0 and not allow_missing: + if ( + len(result_dict) == 0 + and not allow_missing + and not self._all_features_on_demand + ): return None if not allow_missing and len(missing_features) > 0: @@ -1255,6 +1269,17 @@ def validate_entry( Keys relevant to vector_db are filtered out. """ + _logger.debug( + "Checking if entry is None and all features in the feature view are on-demand." + ) + if not entry: + if self._all_features_on_demand: + return {} + else: + raise exceptions.FeatureStoreException( + "The required argument `entries` is missing. If the feature view includes only on-demand features, entries may be left empty or set to None." + ) + _logger.debug("Checking keys in entry are valid serving keys.") for key in entry.keys(): if key not in self.valid_serving_keys: @@ -1584,3 +1609,12 @@ def transformed_feature_vector_col_name(self): ] self._transformed_feature_vector_col_name.extend(output_column_names) return self._transformed_feature_vector_col_name + + @property + def _all_features_on_demand(self) -> bool: + """True if all features in the feature view are on-demand.""" + if self.__all_features_on_demand is None: + self.__all_features_on_demand = all( + feature.on_demand_transformation_function for feature in self._features + ) + return self.__all_features_on_demand diff --git a/python/hsfs/feature_view.py b/python/hsfs/feature_view.py index 5d3151b18..b61b3e09a 100644 --- a/python/hsfs/feature_view.py +++ b/python/hsfs/feature_view.py @@ -520,7 +520,7 @@ def get_batch_query( def get_feature_vector( self, - entry: Dict[str, Any], + entry: Optional[Dict[str, Any]] = None, passed_features: Optional[Dict[str, Any]] = None, external: Optional[bool] = None, return_type: Literal["list", "polars", "numpy", "pandas"] = "list", @@ -635,7 +635,7 @@ def get_feature_vector( def get_feature_vectors( self, - entry: List[Dict[str, Any]], + entry: Optional[List[Dict[str, Any]]] = None, passed_features: Optional[List[Dict[str, Any]]] = None, external: Optional[bool] = None, return_type: Literal["list", "polars", "numpy", "pandas"] = "list",