Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…ence into feature/forecasting
  • Loading branch information
mrDzurb committed Sep 28, 2023
2 parents 997d27f + fa6f315 commit ac932aa
Show file tree
Hide file tree
Showing 29 changed files with 444 additions and 142 deletions.
1 change: 1 addition & 0 deletions .gitleaks.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ useDefault = true
'''example-password''',
'''this-is-not-the-secret''',
'''<redacted>''',
'''security_token''',
# NVIDIA_GPGKEY_SUM from public documentation:
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/centos7/base/Dockerfile
'''d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87'''
Expand Down
2 changes: 1 addition & 1 deletion README-development.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ Open the destination folder where you want to clone ADS library, and install dep
python3 -m pip install -e .
```

To which packages were installed and their version numbers, run:
To view which packages were installed and their version numbers, run:

```bash
python3 -m pip freeze
Expand Down
2 changes: 1 addition & 1 deletion SECURITY.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ security features are welcome on GitHub Issues.

Security updates will be released on a regular cadence. Many of our projects
will typically release security fixes in conjunction with the
[Oracle Critical Patch Update][3] program. Additional
Oracle Critical Patch Update program. Additional
information, including past advisories, is available on our [security alerts][4]
page.

Expand Down
14 changes: 9 additions & 5 deletions ads/dataset/sampled_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@
OptionalDependency,
)

NATURAL_EARTH_DATASET = "naturalearth_lowres"

class PandasDataset(object):
"""
This class provides APIs that can work on a sampled dataset.
"""

@runtime_dependency(module="geopandas", install_from=OptionalDependency.GEO)
def __init__(
self,
sampled_df,
Expand All @@ -67,9 +67,7 @@ def __init__(
self.correlation = None
self.feature_dist_html_dict = {}
self.feature_types = metadata if metadata is not None else {}
self.world = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres")
)
self.world = None

self.numeric_columns = self.sampled_df.select_dtypes(
utils.numeric_pandas_dtypes()
Expand Down Expand Up @@ -562,7 +560,7 @@ def plot_gis_scatter(self, lon="longitude", lat="latitude", ax=None):
),
)
world = geopandas.read_file(
geopandas.datasets.get_path("naturalearth_lowres")
geopandas.datasets.get_path(NATURAL_EARTH_DATASET)
)
ax1 = world.plot(ax=ax, color="lightgrey", linewidth=0.5, edgecolor="white")
gdf.plot(ax=ax1, color="blue", markersize=10)
Expand Down Expand Up @@ -706,6 +704,12 @@ def _visualize_feature_distribution(self, html_widget):
gdf = geopandas.GeoDataFrame(
df, geometry=geopandas.points_from_xy(df["lon"], df["lat"])
)

if not self.world:
self.world = geopandas.read_file(
geopandas.datasets.get_path(NATURAL_EARTH_DATASET)
)

self.world.plot(
ax=ax, color="lightgrey", linewidth=0.5, edgecolor="white"
)
Expand Down
116 changes: 69 additions & 47 deletions ads/model/artifact_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import Dict, Optional

from ads.common import utils
from ads.common.object_storage_details import ObjectStorageDetails
from ads.model.common import utils as model_utils
from ads.model.service.oci_datascience_model import OCIDataScienceModel

Expand All @@ -29,7 +30,10 @@ def __init__(self, dsc_model: OCIDataScienceModel, artifact_path: str):
artifact_path: str
The model artifact location.
"""
if not os.path.exists(artifact_path):
if not (
ObjectStorageDetails.is_oci_path(artifact_path)
or os.path.exists(artifact_path)
):
raise ValueError(f"The `{artifact_path}` does not exist")

self.dsc_model = dsc_model
Expand All @@ -45,7 +49,7 @@ def upload(self):
) as progress:
self.progress = progress
self.progress.update("Preparing model artifacts ZIP archive.")
self._prepare_artiact_tmp_zip()
self._prepare_artifact_tmp_zip()
self.progress.update("Uploading model artifacts.")
self._upload()
self.progress.update(
Expand All @@ -55,22 +59,19 @@ def upload(self):
except Exception:
raise
finally:
self._remove_artiact_tmp_zip()
self._remove_artifact_tmp_zip()

def _prepare_artiact_tmp_zip(self) -> str:
def _prepare_artifact_tmp_zip(self) -> str:
"""Prepares model artifacts ZIP archive.
Parameters
----------
progress: (TqdmProgressBar, optional). Defaults to `None`.
The progress indicator.
Returns
-------
str
Path to the model artifact ZIP archive.
"""
if os.path.isfile(self.artifact_path) and self.artifact_path.lower().endswith(
if ObjectStorageDetails.is_oci_path(self.artifact_path):
self.artifact_zip_path = self.artifact_path
elif os.path.isfile(self.artifact_path) and self.artifact_path.lower().endswith(
".zip"
):
self.artifact_zip_path = self.artifact_path
Expand All @@ -80,7 +81,7 @@ def _prepare_artiact_tmp_zip(self) -> str:
)
return self.artifact_zip_path

def _remove_artiact_tmp_zip(self):
def _remove_artifact_tmp_zip(self):
"""Removes temporary created artifact zip archive."""
if (
self.artifact_zip_path
Expand Down Expand Up @@ -112,7 +113,10 @@ class LargeArtifactUploader(ArtifactUploader):
Attributes
----------
artifact_path: str
The model artifact location.
The model artifact location. Possible values are:
- object storage path to zip archive. Example: `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`.
- local path to zip archive. Example: `./mymodel.zip`.
- local path to folder with artifacts. Example: `./mymodel`.
artifact_zip_path: str
The uri of the zip of model artifact.
auth: dict
Expand All @@ -124,6 +128,8 @@ class LargeArtifactUploader(ArtifactUploader):
The OCI Object Storage URI where model artifacts will be copied to.
The `bucket_uri` is only necessary for uploading large artifacts which
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
.. versionadded:: 2.8.10
If artifact_path is object storage path to a zip archive, bucket_uri will be ignored.
dsc_model: OCIDataScienceModel
The data scince model instance.
overwrite_existing_artifact: bool
Expand All @@ -145,7 +151,7 @@ def __init__(
self,
dsc_model: OCIDataScienceModel,
artifact_path: str,
bucket_uri: str,
bucket_uri: str = None,
auth: Optional[Dict] = None,
region: Optional[str] = None,
overwrite_existing_artifact: Optional[bool] = True,
Expand All @@ -159,11 +165,16 @@ def __init__(
dsc_model: OCIDataScienceModel
The data scince model instance.
artifact_path: str
The model artifact location.
bucket_uri: str
The model artifact location. Possible values are:
- object storage path to zip archive. Example: `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`.
- local path to zip archive. Example: `./mymodel.zip`.
- local path to folder with artifacts. Example: `./mymodel`.
bucket_uri: (str, optional). Defaults to `None`.
The OCI Object Storage URI where model artifacts will be copied to.
The `bucket_uri` is only necessary for uploading large artifacts which
The `bucket_uri` is only necessary for uploading large artifacts from local which
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
.. versionadded:: 2.8.10
If `artifact_path` is object storage path to a zip archive, `bucket_uri` will be ignored.
auth: (Dict, optional). Defaults to `None`.
The default authetication is set using `ads.set_auth` API.
If you need to override the default, use the `ads.common.auth.api_keys` or
Expand All @@ -179,11 +190,22 @@ def __init__(
parallel_process_count: (int, optional).
The number of worker processes to use in parallel for uploading individual parts of a multipart upload.
"""
self.auth = auth or dsc_model.auth
if ObjectStorageDetails.is_oci_path(artifact_path):
if not artifact_path.endswith(".zip"):
raise ValueError(
f"The `artifact_path={artifact_path}` is invalid."
"The remote path for model artifact should be a zip archive, "
"e.g. `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`."
)
if not utils.is_path_exists(uri=artifact_path, auth=self.auth):
raise ValueError(f"The `{artifact_path}` does not exist.")
bucket_uri = artifact_path

if not bucket_uri:
raise ValueError("The `bucket_uri` must be provided.")

super().__init__(dsc_model=dsc_model, artifact_path=artifact_path)
self.auth = auth or dsc_model.auth
self.region = region or utils.extract_region(self.auth)
self.bucket_uri = bucket_uri
self.overwrite_existing_artifact = overwrite_existing_artifact
Expand All @@ -192,38 +214,38 @@ def __init__(

def _upload(self):
"""Uploads model artifacts to the model catalog."""
self.progress.update("Copying model artifact to the Object Storage bucket")

bucket_uri = self.bucket_uri
bucket_uri_file_name = os.path.basename(bucket_uri)

if not bucket_uri_file_name:
bucket_uri = os.path.join(bucket_uri, f"{self.dsc_model.id}.zip")
elif not bucket_uri.lower().endswith(".zip"):
bucket_uri = f"{bucket_uri}.zip"

if not self.overwrite_existing_artifact and utils.is_path_exists(
uri=bucket_uri, auth=self.auth
):
raise FileExistsError(
f"The bucket_uri=`{self.bucket_uri}` exists. Please use a new file name or "
"set `overwrite_existing_artifact` to `True` if you wish to overwrite."
)
self.progress.update("Copying model artifact to the Object Storage bucket")
if not bucket_uri == self.artifact_zip_path:
bucket_uri_file_name = os.path.basename(bucket_uri)

if not bucket_uri_file_name:
bucket_uri = os.path.join(bucket_uri, f"{self.dsc_model.id}.zip")
elif not bucket_uri.lower().endswith(".zip"):
bucket_uri = f"{bucket_uri}.zip"

if not self.overwrite_existing_artifact and utils.is_path_exists(
uri=bucket_uri, auth=self.auth
):
raise FileExistsError(
f"The bucket_uri=`{self.bucket_uri}` exists. Please use a new file name or "
"set `overwrite_existing_artifact` to `True` if you wish to overwrite."
)

try:
utils.upload_to_os(
src_uri=self.artifact_zip_path,
dst_uri=bucket_uri,
auth=self.auth,
parallel_process_count=self._parallel_process_count,
force_overwrite=self.overwrite_existing_artifact,
progressbar_description="Copying model artifact to the Object Storage bucket.",
)
except Exception as ex:
raise RuntimeError(
f"Failed to upload model artifact to the given Object Storage path `{self.bucket_uri}`."
f"See Exception: {ex}"
)
try:
utils.upload_to_os(
src_uri=self.artifact_zip_path,
dst_uri=bucket_uri,
auth=self.auth,
parallel_process_count=self._parallel_process_count,
force_overwrite=self.overwrite_existing_artifact,
progressbar_description="Copying model artifact to the Object Storage bucket.",
)
except Exception as ex:
raise RuntimeError(
f"Failed to upload model artifact to the given Object Storage path `{self.bucket_uri}`."
f"See Exception: {ex}"
)

self.progress.update("Exporting model artifact to the model catalog")
self.dsc_model.export_model_artifact(bucket_uri=bucket_uri, region=self.region)
Expand Down
12 changes: 12 additions & 0 deletions ads/model/datascience_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import pandas
from ads.common import utils
from ads.common.object_storage_details import ObjectStorageDetails
from ads.config import COMPARTMENT_OCID, PROJECT_OCID
from ads.feature_engineering.schema import Schema
from ads.jobs.builders.base import Builder
Expand Down Expand Up @@ -548,6 +549,8 @@ def create(self, **kwargs) -> "DataScienceModel":
The OCI Object Storage URI where model artifacts will be copied to.
The `bucket_uri` is only necessary for uploading large artifacts which
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
.. versionadded:: 2.8.10
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
overwrite_existing_artifact: (bool, optional). Defaults to `True`.
Overwrite target bucket artifact if exists.
remove_existing_artifact: (bool, optional). Defaults to `True`.
Expand Down Expand Up @@ -636,6 +639,8 @@ def upload_artifact(
The OCI Object Storage URI where model artifacts will be copied to.
The `bucket_uri` is only necessary for uploading large artifacts which
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
.. versionadded:: 2.8.10
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
auth: (Dict, optional). Defaults to `None`.
The default authentication is set using `ads.set_auth` API.
If you need to override the default, use the `ads.common.auth.api_keys` or
Expand Down Expand Up @@ -668,6 +673,13 @@ def upload_artifact(
"timeout": timeout,
}

if ObjectStorageDetails.is_oci_path(self.artifact):
if bucket_uri and bucket_uri != self.artifact:
logger.warn(
"The `bucket_uri` will be ignored and the value of `self.artifact` will be used instead."
)
bucket_uri = self.artifact

if bucket_uri or utils.folder_size(self.artifact) > _MAX_ARTIFACT_SIZE_IN_BYTES:
if not bucket_uri:
raise ModelArtifactSizeError(
Expand Down
Loading

0 comments on commit ac932aa

Please sign in to comment.