Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/feature/autogen' into feature/au…
Browse files Browse the repository at this point in the history
…togen
  • Loading branch information
qiuosier committed Nov 13, 2024
2 parents 25083b8 + 8e87aab commit 561bb88
Show file tree
Hide file tree
Showing 37 changed files with 552 additions and 431 deletions.
10 changes: 10 additions & 0 deletions ads/aqua/common/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,16 @@ def inner_function(
reason=error.message,
service_payload=error.args[0] if error.args else None,
exc_info=sys.exc_info(),
aqua_api_details=dict(
# __qualname__ gives information of class and name of api
aqua_api_name=func.__qualname__,
oci_api_name=getattr(
error, "operation_name", "Unknown OCI Operation"
),
service_endpoint=getattr(
error, "request_endpoint", "Unknown Request Endpoint"
)
)
)
except (
ClientError,
Expand Down
5 changes: 4 additions & 1 deletion ads/aqua/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,13 +788,14 @@ def get_ocid_substring(ocid: str, key_len: int) -> str:
return ocid[-key_len:] if ocid and len(ocid) > key_len else ""


def upload_folder(os_path: str, local_dir: str, model_name: str) -> str:
def upload_folder(os_path: str, local_dir: str, model_name: str, exclude_pattern: str = None) -> str:
"""Upload the local folder to the object storage
Args:
os_path (str): object storage URI with prefix. This is the path to upload
local_dir (str): Local directory where the object is downloaded
model_name (str): Name of the huggingface model
exclude_pattern (optional, str): The matching pattern of files to be excluded from uploading.
Retuns:
str: Object name inside the bucket
"""
Expand All @@ -804,6 +805,8 @@ def upload_folder(os_path: str, local_dir: str, model_name: str) -> str:
auth_state = AuthState()
object_path = os_details.filepath.rstrip("/") + "/" + model_name + "/"
command = f"oci os object bulk-upload --src-dir {local_dir} --prefix {object_path} -bn {os_details.bucket} -ns {os_details.namespace} --auth {auth_state.oci_iam_type} --profile {auth_state.oci_key_profile} --no-overwrite"
if exclude_pattern:
command += f" --exclude {exclude_pattern}"
try:
logger.info(f"Running: {command}")
subprocess.check_call(shlex.split(command))
Expand Down
1 change: 1 addition & 0 deletions ads/aqua/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME = "_name_or_path"
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE = "model_type"
AQUA_MODEL_ARTIFACT_FILE = "model_file"
HF_METADATA_FOLDER = ".cache/"
HF_LOGIN_DEFAULT_TIMEOUT = 2

TRAINING_METRICS_FINAL = "training_metrics_final"
Expand Down
14 changes: 12 additions & 2 deletions ads/aqua/evaluation/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
This module contains dataclasses for aqua evaluation.
"""

from typing import Any, Dict, List, Optional

from pydantic import Field
from typing import Any, Dict, List, Optional, Union

from ads.aqua.data import AquaResourceIdentifier
from ads.aqua.config.utils.serializer import Serializable
from ads.aqua.data import AquaResourceIdentifier


class CreateAquaEvaluationDetails(Serializable):
Expand Down Expand Up @@ -87,6 +88,8 @@ class CreateAquaEvaluationDetails(Serializable):

class Config:
extra = "ignore"
protected_namespaces = ()


class AquaEvalReport(Serializable):
evaluation_id: str = ""
Expand All @@ -95,6 +98,7 @@ class AquaEvalReport(Serializable):
class Config:
extra = "ignore"


class AquaEvalParams(Serializable):
shape: str = ""
dataset_path: str = ""
Expand All @@ -103,6 +107,7 @@ class AquaEvalParams(Serializable):
class Config:
extra = "allow"


class AquaEvalMetric(Serializable):
key: str
name: str
Expand All @@ -111,6 +116,7 @@ class AquaEvalMetric(Serializable):
class Config:
extra = "ignore"


class AquaEvalMetricSummary(Serializable):
metric: str = ""
score: str = ""
Expand All @@ -119,6 +125,7 @@ class AquaEvalMetricSummary(Serializable):
class Config:
extra = "ignore"


class AquaEvalMetrics(Serializable):
id: str
report: str
Expand All @@ -128,6 +135,7 @@ class AquaEvalMetrics(Serializable):
class Config:
extra = "ignore"


class AquaEvaluationCommands(Serializable):
evaluation_id: str
evaluation_target_id: str
Expand All @@ -139,6 +147,7 @@ class AquaEvaluationCommands(Serializable):
class Config:
extra = "ignore"


class AquaEvaluationSummary(Serializable):
"""Represents a summary of Aqua evalution."""

Expand All @@ -157,6 +166,7 @@ class AquaEvaluationSummary(Serializable):
class Config:
extra = "ignore"


class AquaEvaluationDetail(AquaEvaluationSummary):
"""Represents a details of Aqua evalution."""

Expand Down
2 changes: 2 additions & 0 deletions ads/aqua/extension/aqua_ws_msg_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,12 @@ def write_error(self, status_code, **kwargs):
logger.warning(reply["message"])
# telemetry may not be present if there is an error while initializing
if hasattr(self, "telemetry"):
aqua_api_details = kwargs.get("aqua_api_details", {})
self.telemetry.record_event_async(
category="aqua/error",
action=str(status_code),
value=reason,
**aqua_api_details
)
response = AquaWsError(
status=status_code,
Expand Down
2 changes: 2 additions & 0 deletions ads/aqua/extension/base_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,12 @@ def write_error(self, status_code, **kwargs):

# telemetry may not be present if there is an error while initializing
if hasattr(self, "telemetry"):
aqua_api_details = kwargs.get("aqua_api_details", {})
self.telemetry.record_event_async(
category="aqua/error",
action=str(status_code),
value=reason,
**aqua_api_details
)

self.finish(json.dumps(reply))
Expand Down
4 changes: 4 additions & 0 deletions ads/aqua/extension/model_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ def post(self, *args, **kwargs):
str(input_data.get("download_from_hf", "false")).lower() == "true"
)
inference_container_uri = input_data.get("inference_container_uri")
allow_patterns = input_data.get("allow_patterns")
ignore_patterns = input_data.get("ignore_patterns")

return self.finish(
AquaModelApp().register(
Expand All @@ -141,6 +143,8 @@ def post(self, *args, **kwargs):
project_id=project_id,
model_file=model_file,
inference_container_uri=inference_container_uri,
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
)
)

Expand Down
3 changes: 3 additions & 0 deletions ads/aqua/finetuning/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ class FineTuneCustomMetadata(str, metaclass=ExtendedEnumMeta):
SERVICE_MODEL_ARTIFACT_LOCATION = "artifact_location"
SERVICE_MODEL_DEPLOYMENT_CONTAINER = "deployment-container"
SERVICE_MODEL_FINE_TUNE_CONTAINER = "finetune-container"


ENV_AQUA_FINE_TUNING_CONTAINER = "AQUA_FINE_TUNING_CONTAINER"
15 changes: 13 additions & 2 deletions ads/aqua/finetuning/finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@
UNKNOWN_DICT,
)
from ads.aqua.data import AquaResourceIdentifier
from ads.aqua.finetuning.constants import *
from ads.aqua.finetuning.constants import (
ENV_AQUA_FINE_TUNING_CONTAINER,
FineTuneCustomMetadata,
)
from ads.aqua.finetuning.entities import *
from ads.common.auth import default_signer
from ads.common.object_storage_details import ObjectStorageDetails
Expand Down Expand Up @@ -310,6 +313,15 @@ def create(
except Exception:
pass

if not is_custom_container and ENV_AQUA_FINE_TUNING_CONTAINER in os.environ:
ft_container = os.environ[ENV_AQUA_FINE_TUNING_CONTAINER]
logger.info(
"Using container set by environment variable %s=%s",
ENV_AQUA_FINE_TUNING_CONTAINER,
ft_container,
)
is_custom_container = True

ft_parameters.batch_size = ft_parameters.batch_size or (
ft_config.get("shape", UNKNOWN_DICT)
.get(create_fine_tuning_details.shape_name, UNKNOWN_DICT)
Expand Down Expand Up @@ -559,7 +571,6 @@ def get_finetuning_config(self, model_id: str) -> Dict:
Dict:
A dict of allowed finetuning configs.
"""

config = self.get_config(model_id, AQUA_MODEL_FINETUNING_CONFIG)
if not config:
logger.debug(
Expand Down
2 changes: 2 additions & 0 deletions ads/aqua/model/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,8 @@ class ImportModelDetails(CLIBuilderMixin):
project_id: Optional[str] = None
model_file: Optional[str] = None
inference_container_uri: Optional[str] = None
allow_patterns: Optional[List[str]] = None
ignore_patterns: Optional[List[str]] = None

def __post_init__(self):
self._command = "model register"
44 changes: 25 additions & 19 deletions ads/aqua/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE,
AQUA_MODEL_ARTIFACT_FILE,
AQUA_MODEL_TYPE_CUSTOM,
HF_METADATA_FOLDER,
LICENSE_TXT,
MODEL_BY_REFERENCE_OSS_PATH_KEY,
README,
Expand Down Expand Up @@ -1274,6 +1275,8 @@ def _download_model_from_hf(
model_name: str,
os_path: str,
local_dir: str = None,
allow_patterns: List[str] = None,
ignore_patterns: List[str] = None,
) -> str:
"""This helper function downloads the model artifact from Hugging Face to a local folder, then uploads
to object storage location.
Expand All @@ -1283,6 +1286,12 @@ def _download_model_from_hf(
model_name (str): The huggingface model name.
os_path (str): The OS path where the model files are located.
local_dir (str): The local temp dir to store the huggingface model.
allow_patterns (list): Model files matching at least one pattern are downloaded.
Example: ["*.json"] will download all .json files. ["folder/*"] will download all files under `folder`.
Patterns are Standard Wildcards (globbing patterns) and rules can be found here: https://docs.python.org/3/library/fnmatch.html
ignore_patterns (list): Model files matching any of the patterns are not downloaded.
Example: ["*.json"] will ignore all .json files. ["folder/*"] will ignore all files under `folder`.
Patterns are Standard Wildcards (globbing patterns) and rules can be found here: https://docs.python.org/3/library/fnmatch.html
Returns
-------
Expand All @@ -1293,30 +1302,19 @@ def _download_model_from_hf(
if not local_dir:
local_dir = os.path.join(os.path.expanduser("~"), "cached-model")
local_dir = os.path.join(local_dir, model_name)
retry = 10
i = 0
huggingface_download_err_message = None
while i < retry:
try:
# Download to cache folder. The while loop retries when there is a network failure
snapshot_download(repo_id=model_name)
except Exception as e:
huggingface_download_err_message = str(e)
i += 1
else:
break
if i == retry:
raise Exception(
f"Could not download the model {model_name} from https://huggingface.co with message {huggingface_download_err_message}"
)
os.makedirs(local_dir, exist_ok=True)
# Copy the model from the cache to destination
snapshot_download(repo_id=model_name, local_dir=local_dir)
# Upload to object storage
snapshot_download(
repo_id=model_name,
local_dir=local_dir,
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
)
# Upload to object storage and skip .cache/huggingface/ folder
model_artifact_path = upload_folder(
os_path=os_path,
local_dir=local_dir,
model_name=model_name,
exclude_pattern=f"{HF_METADATA_FOLDER}*"
)

return model_artifact_path
Expand All @@ -1335,6 +1333,12 @@ def register(
os_path (str): Object storage destination URI to store the downloaded model. Format: oci://bucket-name@namespace/prefix
inference_container (str): selects service defaults
finetuning_container (str): selects service defaults
allow_patterns (list): Model files matching at least one pattern are downloaded.
Example: ["*.json"] will download all .json files. ["folder/*"] will download all files under `folder`.
Patterns are Standard Wildcards (globbing patterns) and rules can be found here: https://docs.python.org/3/library/fnmatch.html
ignore_patterns (list): Model files matching any of the patterns are not downloaded.
Example: ["*.json"] will ignore all .json files. ["folder/*"] will ignore all files under `folder`.
Patterns are Standard Wildcards (globbing patterns) and rules can be found here: https://docs.python.org/3/library/fnmatch.html
Returns:
AquaModel:
Expand Down Expand Up @@ -1381,6 +1385,8 @@ def register(
model_name=model_name,
os_path=import_model_details.os_path,
local_dir=import_model_details.local_dir,
allow_patterns=import_model_details.allow_patterns,
ignore_patterns=import_model_details.ignore_patterns,
).rstrip("/")
else:
artifact_path = import_model_details.os_path.rstrip("/")
Expand Down
11 changes: 6 additions & 5 deletions ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import importlib
import logging

import numpy as np
import pandas as pd
import report_creator as rc
from merlion.post_process.threshold import AggregateAlarms
from merlion.utils import TimeSeries

Expand All @@ -21,6 +23,8 @@
from .anomaly_dataset import AnomalyOutput
from .base_model import AnomalyOperatorBaseModel

logging.getLogger("report_creator").setLevel(logging.WARNING)


class AnomalyMerlionOperatorModel(AnomalyOperatorBaseModel):
"""Class representing Merlion Anomaly Detection operator model."""
Expand Down Expand Up @@ -84,7 +88,7 @@ def _build_model(self) -> AnomalyOutput:
for target, df in self.datasets.full_data_dict.items():
data = df.set_index(date_column)
data = TimeSeries.from_pd(data)
for model_name, (model_config, model) in model_config_map.items():
for _, (model_config, model) in model_config_map.items():
if self.spec.model == SupportedModels.BOCPD:
model_config = model_config(**self.spec.model_kwargs)
else:
Expand Down Expand Up @@ -115,7 +119,7 @@ def _build_model(self) -> AnomalyOutput:
y_pred = (y_pred.to_pd().reset_index()["anom_score"] > 0).astype(
int
)
except Exception as e:
except Exception:
y_pred = (
scores["anom_score"]
> np.percentile(
Expand All @@ -135,15 +139,12 @@ def _build_model(self) -> AnomalyOutput:
OutputColumns.SCORE_COL: scores["anom_score"],
}
).reset_index(drop=True)
# model_objects[model_name].append(model)

anomaly_output.add_output(target, anomaly, score)
return anomaly_output

def _generate_report(self):
"""Genreates a report for the model."""
import report_creator as rc

other_sections = [
rc.Heading("Selected Models Overview", level=2),
rc.Text(
Expand Down
Loading

0 comments on commit 561bb88

Please sign in to comment.