Skip to content

Commit

Permalink
stats representation via plotly in to_viz interface (#335)
Browse files Browse the repository at this point in the history
  • Loading branch information
KshitizLohia authored Sep 21, 2023
2 parents 05d5032 + 3de6cd3 commit 754e964
Show file tree
Hide file tree
Showing 20 changed files with 592 additions and 41 deletions.
6 changes: 6 additions & 0 deletions THIRD_PARTY_LICENSES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,12 @@ pandavro
* Source code: https://github.com/ynqa/pandavro
* Project home: https://github.com/ynqa/pandavro

plotly
* Copyright (c) 2016-2018 Plotly, Inc
* License: MIT License
* Source code: https://github.com/plotly/plotly.py
* Project home: https://plotly.com/

protobuf
* Copyright 2008 Google Inc. All rights reserved.
* License: Google Protobuf License
Expand Down
18 changes: 10 additions & 8 deletions ads/feature_store/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from ads.feature_store.feature_group_expectation import Expectation
from ads.feature_store.feature_option_details import FeatureOptionDetails
from ads.feature_store.service.oci_dataset import OCIDataset
from ads.feature_store.statistics import Statistics
from ads.feature_store.statistics.statistics import Statistics
from ads.feature_store.statistics_config import StatisticsConfig
from ads.feature_store.service.oci_lineage import OCILineage
from ads.feature_store.model_details import ModelDetails
Expand Down Expand Up @@ -164,7 +164,7 @@ def __init__(self, spec: Dict = None, **kwargs) -> None:
self.oci_dataset = self._to_oci_dataset(**kwargs)
self.lineage = OCILineage(**kwargs)

def _to_oci_dataset(self, **kwargs):
def _to_oci_dataset(self, **kwargs) -> OCIDataset:
"""Creates an `OCIDataset` instance from the `Dataset`.
kwargs
Expand Down Expand Up @@ -235,8 +235,8 @@ def name(self) -> str:
return self.get_spec(self.CONST_NAME)

@name.setter
def name(self, name: str) -> "Dataset":
return self.with_name(name)
def name(self, name: str):
self.with_name(name)

def with_name(self, name: str) -> "Dataset":
"""Sets the name.
Expand Down Expand Up @@ -1207,12 +1207,14 @@ def to_dict(self) -> Dict:
for key, value in spec.items():
if hasattr(value, "to_dict"):
value = value.to_dict()
if hasattr(value, "attribute_map"):
value = self.oci_dataset.client.base_client.sanitize_for_serialization(
if key == self.CONST_FEATURE_GROUP:
spec[
key
] = self.oci_dataset.client.base_client.sanitize_for_serialization(
value
)
spec[key] = value

else:
spec[key] = value
return {
"kind": self.kind,
"type": self.type,
Expand Down
2 changes: 1 addition & 1 deletion ads/feature_store/docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ help:
.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
# "make mode" option. $(O) is me`ant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

Expand Down
8 changes: 8 additions & 0 deletions ads/feature_store/docs/source/dataset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,14 @@ The ``get_statistics()`` method takes the following optional parameter:
.. image:: figures/dataset_statistics.png

.. code-block:: python3
# Fetch and visualize stats for a dataset job
df = dataset.get_statistics(job_id).to_viz()
.. image:: figures/dataset_statistics_viz.png


.. seealso::

:ref:`Statistics`
Expand Down
7 changes: 7 additions & 0 deletions ads/feature_store/docs/source/feature_group.rst
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,13 @@ The ``get_statistics()`` method takes the following optional parameter:
.. image:: figures/stats_1.png

.. code-block:: python3
# Fetch and visualize stats for a dataset job
df = feature_group.get_statistics(job_id).to_viz()
.. image:: figures/feature_group_statistics_viz.png

.. seealso::

:ref:`Statistics`
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 4 additions & 6 deletions ads/feature_store/feature_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from ads.feature_store.service.oci_feature_group import OCIFeatureGroup
from ads.feature_store.service.oci_feature_group_job import OCIFeatureGroupJob
from ads.feature_store.service.oci_lineage import OCILineage
from ads.feature_store.statistics import Statistics
from ads.feature_store.statistics.statistics import Statistics
from ads.feature_store.statistics_config import StatisticsConfig
from ads.feature_store.validation_output import ValidationOutput

Expand Down Expand Up @@ -244,8 +244,8 @@ def name(self) -> str:
return self.get_spec(self.CONST_NAME)

@name.setter
def name(self, name: str) -> "FeatureGroup":
return self.with_name(name)
def name(self, name: str):
self.with_name(name)

def with_name(self, name: str) -> "FeatureGroup":
"""Sets the name.
Expand Down Expand Up @@ -338,7 +338,7 @@ def transformation_kwargs(self, value: Dict):
self.with_transformation_kwargs(value)

def with_transformation_kwargs(
self, transformation_kwargs: Dict = {}
self, transformation_kwargs: Dict = ()
) -> "FeatureGroup":
"""Sets the primary keys of the feature group.
Expand Down Expand Up @@ -604,7 +604,6 @@ def with_statistics_config(
FeatureGroup
The FeatureGroup instance (self).
"""
statistics_config_in = None
if isinstance(statistics_config, StatisticsConfig):
statistics_config_in = statistics_config
elif isinstance(statistics_config, bool):
Expand Down Expand Up @@ -1108,7 +1107,6 @@ def restore(self, version_number: int = None, timestamp: datetime = None):
f"RESTORE TABLE {target_table} TO VERSION AS OF {version_number}"
)
else:
iso_timestamp = timestamp.isoformat(" ", "seconds").__str__()
sql_query = f"RESTORE TABLE {target_table} TO TIMESTAMP AS OF {timestamp}"

restore_output = self.spark_engine.sql(sql_query)
Expand Down
25 changes: 0 additions & 25 deletions ads/feature_store/statistics.py

This file was deleted.

Empty file.
Empty file.
59 changes: 59 additions & 0 deletions ads/feature_store/statistics/charts/abstract_feature_stat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python
# -*- coding: utf-8; -*-
# Copyright (c) 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
from abc import abstractmethod
from typing import Union

from ads.common.decorator.runtime_dependency import OptionalDependency

try:
from plotly.graph_objs import Figure
except ModuleNotFoundError:
raise ModuleNotFoundError(
f"The `plotly` module was not found. Please run `pip install "
f"{OptionalDependency.FEATURE_STORE}`."
)


class AbsFeatureStat:
class ValidationFailedException(Exception):
def __init__(self):
pass

def __init__(self):
self.__validate__()

@abstractmethod
def __validate__(self):
pass

@abstractmethod
def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
pass

@classmethod
@abstractmethod
def __from_json__(cls, json_dict: dict):
pass

@staticmethod
def get_x_y_str_axes(xaxis: int, yaxis: int) -> ():
return (
("xaxis" + str(xaxis + 1)),
("yaxis" + str(yaxis + 1)),
("x" + str(xaxis + 1)),
("y" + str(yaxis + 1)),
)

@classmethod
def from_json(
cls, json_dict: dict, ignore_errors: bool = False
) -> Union["AbsFeatureStat", None]:
try:
return cls.__from_json__(json_dict=json_dict)
except Exception as e:
if ignore_errors:
return None
else:
raise e
130 changes: 130 additions & 0 deletions ads/feature_store/statistics/charts/box_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env python
# -*- coding: utf-8; -*-
# Copyright (c) 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
from typing import List

from ads.common.decorator.runtime_dependency import OptionalDependency
from ads.feature_store.statistics.charts.abstract_feature_stat import AbsFeatureStat
from ads.feature_store.statistics.charts.frequency_distribution import (
FrequencyDistribution,
)
from ads.feature_store.statistics.generic_feature_value import GenericFeatureValue

try:
from plotly.graph_objs import Figure
except ModuleNotFoundError:
raise ModuleNotFoundError(
f"The `plotly` module was not found. Please run `pip install "
f"{OptionalDependency.FEATURE_STORE}`."
)


class BoxPlot(AbsFeatureStat):
CONST_MIN = "Min"
CONST_MAX = "Max"
CONST_QUARTILES = "Quartiles"
CONST_SD = "StandardDeviation"
CONST_MEAN = "Mean"
CONST_BOX_PLOT_TITLE = "Box Plot"
CONST_IQR = "IQR"
CONST_BOX_POINTS = "box_points"

class Quartiles:
CONST_Q1 = "q1"
CONST_Q2 = "q2"
CONST_Q3 = "q3"

def __init__(self, q1: float, q2: float, q3: float):
self.q1 = q1
self.q2 = q2
self.q3 = q3

@classmethod
def from_json(cls, json_dict: dict) -> "BoxPlot.Quartiles":
return cls(
json_dict.get(cls.CONST_Q1),
json_dict.get(cls.CONST_Q2),
json_dict.get(cls.CONST_Q3),
)

def __init__(
self,
mean: float,
median: float,
sd: float,
q1: float,
q3: float,
box_points: List[float],
):
self.mean = mean
self.median = median
self.q1 = q1
self.q3 = q3
self.sd = sd
self.iqr = self.q3 - self.q1
self.box_points = box_points
super().__init__()

def __validate__(self):
if (
self.q1 is None
or self.q3 is None
or self.iqr is None
or type(self.box_points) is not list
or len(self.box_points) == 0
):
return self.ValidationFailedException()

def add_to_figure(self, fig: Figure, xaxis: int, yaxis: int):
xaxis_str, yaxis_str, x_str, y_str = self.get_x_y_str_axes(xaxis, yaxis)
fig.add_box(
notched=False,
boxmean=False,
mean=[self.mean],
median=[self.median],
q1=[self.q1],
q3=[self.q3],
sd=[self.sd],
y=[self.box_points],
upperfence=[self.q3 + 1.5 * self.iqr],
lowerfence=[self.q1 - 1.5 * self.iqr],
xaxis=x_str,
yaxis=y_str,
name="",
jitter=0,
)
fig.layout.annotations[xaxis].text = self.CONST_BOX_PLOT_TITLE
fig.layout[yaxis_str]["title"] = "Values"

@staticmethod
def get_box_points_from_frequency_distribution(
frequency_distribution: FrequencyDistribution,
) -> List[float]:
# box_points = []
if (
frequency_distribution is not None
and frequency_distribution.frequency is not None
and frequency_distribution.bins is not None
):
return [
bin_dist
for frequency, bin_dist in zip(
frequency_distribution.frequency, frequency_distribution.bins
)
if frequency > 0
]
else:
return []

@classmethod
def __from_json__(cls, json_dict: dict) -> "BoxPlot":
quartiles = cls.Quartiles.from_json(json_dict.get(cls.CONST_QUARTILES))
return cls(
mean=GenericFeatureValue.from_json(json_dict.get(cls.CONST_MEAN)).val,
median=quartiles.q2,
sd=GenericFeatureValue.from_json(json_dict.get(cls.CONST_SD)).val,
q1=quartiles.q1,
q3=quartiles.q3,
box_points=json_dict.get(cls.CONST_BOX_POINTS),
)
Loading

0 comments on commit 754e964

Please sign in to comment.