-
Notifications
You must be signed in to change notification settings - Fork 26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FSTORE-1580] OnlineFS Observability #435
Open
bubriks
wants to merge
18
commits into
logicalclocks:main
Choose a base branch
from
bubriks:FSTORE-1580-new
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
f936bc6
temp
bubriks 72fbbf9
standardize headers
bubriks 9d546d5
Merge branch 'main' into FSTORE-1580-new
bubriks 1a5c1fe
lint
bubriks 946b80c
some test fix
bubriks 6e47085
working on tests
bubriks 2bd22a9
add unit test for get_headers
bubriks f7b4800
ruff fix
bubriks f5b1206
add wait_for_online_ingestion
bubriks b80c0de
small rename
bubriks 1f83a73
add timeout
bubriks 0446791
fix B006
bubriks 51196b6
Merge branch 'main' into FSTORE-1580-new
bubriks 55c37eb
test fix
bubriks 03847e3
feedback fix
bubriks 22162b2
Merge branch 'main' into FSTORE-1580-new
bubriks 6a96b06
fix
bubriks a87a4bb
fix lint
bubriks File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
# | ||
# Copyright 2024 Hopsworks AB | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
from __future__ import annotations | ||
|
||
import json | ||
import time | ||
import warnings | ||
from datetime import datetime, timedelta | ||
from typing import ( | ||
Any, | ||
Dict, | ||
List, | ||
Optional, | ||
Union, | ||
) | ||
|
||
import humps | ||
from hopsworks_common import util | ||
from hsfs import feature_group as fg_mod | ||
from hsfs.core import online_ingestion_batch_result | ||
from tqdm.auto import tqdm | ||
|
||
|
||
class OnlineIngestion: | ||
""" | ||
Metadata object used to provide Online Ingestion information for a feature group. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
id: Optional[int] = None, | ||
num_entries: int = None, | ||
current_offsets: Optional[str] = None, | ||
processed_entries: Optional[int] = None, | ||
inserted_entries: Optional[int] = None, | ||
aborted_entries: Optional[int] = None, | ||
batch_results: Union[ | ||
List[online_ingestion_batch_result.OnlineIngestionBatchResult], | ||
List[Dict[str, Any]], | ||
] = None, | ||
feature_group: fg_mod.FeatureGroup = None, | ||
**kwargs, | ||
): | ||
self._id = id | ||
self._num_entries = num_entries # specified when inserting | ||
self._current_offsets = current_offsets | ||
self._processed_entries = processed_entries | ||
self._inserted_entries = inserted_entries | ||
self._aborted_entries = aborted_entries | ||
self._batch_results = [ | ||
( | ||
online_ingestion_batch_result.OnlineIngestionBatchResult.from_response_json( | ||
batch_result | ||
) | ||
if isinstance(batch_result, dict) | ||
else batch_result | ||
) | ||
for batch_result in batch_results | ||
] if batch_results else [] # batch inserts performed by onlinefs | ||
self._feature_group = feature_group | ||
|
||
@classmethod | ||
def from_response_json( | ||
cls, json_dict: Dict[str, Any], feature_group: fg_mod.FeatureGroup = None | ||
) -> OnlineIngestion: | ||
if json_dict is None: | ||
return None | ||
|
||
json_decamelized: dict = humps.decamelize(json_dict) | ||
|
||
if "count" not in json_decamelized: | ||
return cls(**json_decamelized, feature_group=feature_group) | ||
elif json_decamelized["count"] == 1: | ||
return cls(**json_decamelized["items"][0], feature_group=feature_group) | ||
elif json_decamelized["count"] > 1: | ||
return [ | ||
cls(**item, feature_group=feature_group) | ||
for item in json_decamelized["items"] | ||
] | ||
else: | ||
return None | ||
|
||
def refresh(self): | ||
from hsfs.core.online_ingestion_api import OnlineIngestionApi | ||
|
||
online_ingestion = OnlineIngestionApi().get_online_ingestion( | ||
self.feature_group, query_params={"filter_by": f"ID:{self.id}"} | ||
) | ||
self.__dict__.update(online_ingestion.__dict__) | ||
|
||
def to_dict(self): | ||
return {"id": self._id, "numEntries": self._num_entries} | ||
|
||
def json(self): | ||
return json.dumps(self, cls=util.Encoder) | ||
|
||
@property | ||
def id(self) -> Optional[int]: | ||
return self._id | ||
|
||
@property | ||
def num_entries(self) -> int: | ||
return self._num_entries | ||
|
||
@num_entries.setter | ||
def num_entries(self, num_entries: int) -> None: | ||
self._num_entries = num_entries | ||
|
||
@property | ||
def current_offsets(self) -> Optional[str]: | ||
return self._current_offsets | ||
|
||
@property | ||
def processed_entries(self) -> int: | ||
return 0 if self._processed_entries is None else self._processed_entries | ||
|
||
@property | ||
def inserted_entries(self) -> int: | ||
return 0 if self._inserted_entries is None else self._inserted_entries | ||
|
||
@property | ||
def aborted_entries(self) -> int: | ||
return 0 if self._aborted_entries is None else self._aborted_entries | ||
|
||
@property | ||
def batch_results( | ||
self, | ||
) -> List[online_ingestion_batch_result.OnlineIngestionBatchResult]: | ||
return self._batch_results | ||
|
||
@property | ||
def feature_group(self) -> fg_mod.FeatureGroup: | ||
return self._feature_group | ||
|
||
def wait_for_completion(self, options: Dict[str, Any] = None): | ||
if options is None: | ||
options = {} | ||
|
||
# Set timeout time | ||
timeout_delta = timedelta(seconds=options.get("timeout", 60)) | ||
timeout_time = datetime.now() + timeout_delta | ||
|
||
with tqdm( | ||
total=self.num_entries, | ||
bar_format="{desc}: {percentage:.2f}% |{bar}| Rows {n_fmt}/{total_fmt}", | ||
desc="Online data ingestion progress", | ||
mininterval=1, | ||
) as progress_bar: | ||
while True: | ||
if self.aborted_entries: | ||
progress_bar.colour = "RED" | ||
|
||
progress_bar.n = self.processed_entries | ||
progress_bar.refresh() | ||
|
||
if self.processed_entries >= self.num_entries: | ||
break | ||
|
||
if datetime.now() >= timeout_time: | ||
warnings.warn( | ||
f"Timeout of {timeout_delta} was exceeded while waiting for online ingestion completion.", | ||
stacklevel=1, | ||
) | ||
break | ||
|
||
time.sleep(options.get("period", 1)) | ||
|
||
self.refresh() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# | ||
# Copyright 2024 Hopsworks AB | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
from __future__ import annotations | ||
|
||
from typing import Dict, Optional | ||
|
||
from hopsworks_common import client | ||
from hsfs import feature_group as fg_mod | ||
from hsfs.core import online_ingestion | ||
|
||
|
||
class OnlineIngestionApi: | ||
def create_online_ingestion( | ||
self, | ||
feature_group_instance: fg_mod.FeatureGroup, | ||
online_ingestion_instance: online_ingestion.OnlineIngestion, | ||
) -> online_ingestion.OnlineIngestion: | ||
_client = client.get_instance() | ||
path_params = [ | ||
"project", | ||
_client._project_id, | ||
"featurestores", | ||
feature_group_instance.feature_store_id, | ||
"featuregroups", | ||
feature_group_instance.id, | ||
"online_ingestion", | ||
] | ||
|
||
headers = {"content-type": "application/json"} | ||
return online_ingestion.OnlineIngestion.from_response_json( | ||
_client._send_request( | ||
"POST", | ||
path_params, | ||
headers=headers, | ||
data=online_ingestion_instance.json(), | ||
), | ||
feature_group=feature_group_instance, | ||
) | ||
|
||
def get_online_ingestion( | ||
self, | ||
feature_group_instance: fg_mod.FeatureGroup, | ||
query_params: Optional[Dict[str, str]] = None, | ||
) -> online_ingestion.OnlineIngestion: | ||
_client = client.get_instance() | ||
path_params = [ | ||
"project", | ||
_client._project_id, | ||
"featurestores", | ||
feature_group_instance.feature_store_id, | ||
"featuregroups", | ||
feature_group_instance.id, | ||
"online_ingestion", | ||
] | ||
|
||
return online_ingestion.OnlineIngestion.from_response_json( | ||
_client._send_request("GET", path_params, query_params), | ||
feature_group=feature_group_instance, | ||
) |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it make sense to provide a default value here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, i don't think so, since the number of entries directly depends on the dataframe size