From 2ad8034717d6338e1699ddab6c5bcc9ecdeb8b4e Mon Sep 17 00:00:00 2001 From: Oleksandra Pavlusieva Date: Tue, 29 Aug 2023 11:52:28 +0300 Subject: [PATCH 01/17] Delete build_spec.yaml --- build_spec.yaml | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 build_spec.yaml diff --git a/build_spec.yaml b/build_spec.yaml deleted file mode 100644 index c267d2abe..000000000 --- a/build_spec.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) 2023, 2022, Oracle and/or its affiliates. - -version: 0.1 -component: build -timeoutInSeconds: 1000 -shell: bash - -steps: - - type: Command - name: "compress the repo" - command: | - tar -cvzf ${OCI_WORKSPACE_DIR}/repo.tgz ./ -outputArtifacts: - - name: artifact - type: BINARY - location: ${OCI_WORKSPACE_DIR}/repo.tgz From d05975cce8476b0e4a72c2a24efa7e4b9b2f7d83 Mon Sep 17 00:00:00 2001 From: Oleksandra Pavlusieva Date: Tue, 29 Aug 2023 11:53:21 +0300 Subject: [PATCH 02/17] Update SECURITY.md --- SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index fb2384138..2ca81027f 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -21,7 +21,7 @@ security features are welcome on GitHub Issues. Security updates will be released on a regular cadence. Many of our projects will typically release security fixes in conjunction with the -[Oracle Critical Patch Update][3] program. Additional +Oracle Critical Patch Update program. Additional information, including past advisories, is available on our [security alerts][4] page. From 764d8fc48c364d0e2b5110e00512ea125559915c Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Fri, 8 Sep 2023 16:42:55 -0400 Subject: [PATCH 03/17] Added compartment id environment variable while running in pipeline run. --- ads/model/generic_model.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ads/model/generic_model.py b/ads/model/generic_model.py index 127b8ad1b..7ec241c1d 100644 --- a/ads/model/generic_model.py +++ b/ads/model/generic_model.py @@ -29,6 +29,7 @@ JOB_RUN_OCID, NB_SESSION_COMPARTMENT_OCID, NB_SESSION_OCID, + PIPELINE_RUN_COMPARTMENT_OCID, PROJECT_OCID, ) from ads.evaluations import EvaluatorMixin @@ -91,7 +92,11 @@ from ads.model.transformer.onnx_transformer import ONNXTransformer _TRAINING_RESOURCE_ID = JOB_RUN_OCID or NB_SESSION_OCID -_COMPARTMENT_OCID = NB_SESSION_COMPARTMENT_OCID or JOB_RUN_COMPARTMENT_OCID +_COMPARTMENT_OCID = ( + NB_SESSION_COMPARTMENT_OCID + or JOB_RUN_COMPARTMENT_OCID + or PIPELINE_RUN_COMPARTMENT_OCID +) MODEL_DEPLOYMENT_INSTANCE_SHAPE = "VM.Standard.E4.Flex" MODEL_DEPLOYMENT_INSTANCE_OCPUS = 1 From 6b3fbdf5fb56f51b3cb1fb26ea0ea4bfb7a06cd9 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Mon, 11 Sep 2023 16:18:51 -0400 Subject: [PATCH 04/17] Fixed pipeline step to dict. --- ads/pipeline/ads_pipeline_step.py | 13 +++++++++++++ .../default_setup/pipeline/test_pipeline_step.py | 8 ++++++++ 2 files changed, 21 insertions(+) diff --git a/ads/pipeline/ads_pipeline_step.py b/ads/pipeline/ads_pipeline_step.py index 2098274e5..187ecec10 100644 --- a/ads/pipeline/ads_pipeline_step.py +++ b/ads/pipeline/ads_pipeline_step.py @@ -519,6 +519,19 @@ def to_dict(self) -> dict: dict_details["spec"][self.CONST_JOB_ID] = self.job_id if self.description: dict_details["spec"][self.CONST_DESCRIPTION] = self.description + if self.kind == "ML_JOB": + if self.environment_variable: + dict_details["spec"][self.CONST_ENVIRONMENT_VARIABLES] = ( + self.environment_variable + ) + if self.argument: + dict_details["spec"][self.CONST_COMMAND_LINE_ARGUMENTS] = ( + self.argument + ) + if self.maximum_runtime_in_minutes: + dict_details["spec"][self.CONST_MAXIMUM_RUNTIME_IN_MINUTES] = ( + self.maximum_runtime_in_minutes + ) dict_details["spec"].pop(self.CONST_DEPENDS_ON, None) diff --git a/tests/unitary/default_setup/pipeline/test_pipeline_step.py b/tests/unitary/default_setup/pipeline/test_pipeline_step.py index 403e55917..d334bc27b 100644 --- a/tests/unitary/default_setup/pipeline/test_pipeline_step.py +++ b/tests/unitary/default_setup/pipeline/test_pipeline_step.py @@ -38,6 +38,9 @@ class DataSciencePipelineStepBaseTest(unittest.TestCase): PipelineStep("TestUpstreamPipelineStepOne") .with_description("Test upstream pipeline step description one") .with_job_id("TestJobIdOne") + .with_environment_variable(**{"test_key": "test_value"}) + .with_maximum_runtime_in_minutes(20) + .with_argument("--key val path/to/file") ) upstream_pipeline_step_two = ( @@ -85,6 +88,11 @@ def test_pipeline_step_to_dict(self): "name": "TestUpstreamPipelineStepOne", "jobId": "TestJobIdOne", "description": "Test upstream pipeline step description one", + "environmentVariables": { + "test_key": "test_value" + }, + "commandLineArguments": "--key val path/to/file", + "maximumRuntimeInMinutes": 20 }, } From 1367e4cb7f1aa762695fc8ee20aa2d2d46b6ca45 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Mon, 11 Sep 2023 23:14:42 -0400 Subject: [PATCH 05/17] Fixed unit tests. --- tests/unitary/default_setup/pipeline/test_pipeline.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/unitary/default_setup/pipeline/test_pipeline.py b/tests/unitary/default_setup/pipeline/test_pipeline.py index 49c8a4348..302dfe065 100644 --- a/tests/unitary/default_setup/pipeline/test_pipeline.py +++ b/tests/unitary/default_setup/pipeline/test_pipeline.py @@ -333,6 +333,11 @@ def test_pipeline_define(self): "name": "TestPipelineStepOne", "jobId": "TestJobIdOne", "description": "Test description one", + "commandLineArguments": "ARGUMENT --KEY VALUE", + "environmentVariables": { + "ENV": "VALUE" + }, + "maximumRuntimeInMinutes": 20 }, }, { @@ -1060,6 +1065,11 @@ def test_pipeline_to_dict(self): "name": "TestPipelineStepOne", "jobId": "TestJobIdOne", "description": "Test description one", + "commandLineArguments": "ARGUMENT --KEY VALUE", + "environmentVariables": { + "ENV": "VALUE" + }, + "maximumRuntimeInMinutes": 20 }, }, { From 80728928727459d2484deec4a813e3d1f5a0dfd8 Mon Sep 17 00:00:00 2001 From: MING KANG Date: Tue, 12 Sep 2023 09:24:17 -0700 Subject: [PATCH 06/17] Fix/`GenericModel.from_model_deployment()` fails to load model back (#322) --- ads/model/deployment/model_deployment.py | 35 +++++++++++++----------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/ads/model/deployment/model_deployment.py b/ads/model/deployment/model_deployment.py index 41a493f09..1b6e1c3d0 100644 --- a/ads/model/deployment/model_deployment.py +++ b/ads/model/deployment/model_deployment.py @@ -1304,7 +1304,8 @@ def from_id(cls, id: str) -> "ModelDeployment": ModelDeployment The ModelDeployment instance (self). """ - return cls()._update_from_oci_model(OCIDataScienceModelDeployment.from_id(id)) + oci_model = OCIDataScienceModelDeployment.from_id(id) + return cls(properties=oci_model)._update_from_oci_model(oci_model) @classmethod def from_dict(cls, obj_dict: Dict) -> "ModelDeployment": @@ -1503,7 +1504,9 @@ def _build_model_deployment_details(self) -> CreateModelDeploymentDetails: **create_model_deployment_details ).to_oci_model(CreateModelDeploymentDetails) - def _update_model_deployment_details(self, **kwargs) -> UpdateModelDeploymentDetails: + def _update_model_deployment_details( + self, **kwargs + ) -> UpdateModelDeploymentDetails: """Builds UpdateModelDeploymentDetails from model deployment instance. Returns @@ -1527,7 +1530,7 @@ def _update_model_deployment_details(self, **kwargs) -> UpdateModelDeploymentDet return OCIDataScienceModelDeployment( **update_model_deployment_details ).to_oci_model(UpdateModelDeploymentDetails) - + def _update_spec(self, **kwargs) -> "ModelDeployment": """Updates model deployment specs from kwargs. @@ -1542,7 +1545,7 @@ def _update_spec(self, **kwargs) -> "ModelDeployment": Model deployment freeform tags defined_tags: (dict) Model deployment defined tags - + Additional kwargs arguments. Can be any attribute that `ads.model.deployment.ModelDeploymentCondaRuntime`, `ads.model.deployment.ModelDeploymentContainerRuntime` and `ads.model.deployment.ModelDeploymentInfrastructure` accepts. @@ -1559,12 +1562,12 @@ def _update_spec(self, **kwargs) -> "ModelDeployment": specs = { "self": self._spec, "runtime": self.runtime._spec, - "infrastructure": self.infrastructure._spec + "infrastructure": self.infrastructure._spec, } sub_set = { self.infrastructure.CONST_ACCESS_LOG, self.infrastructure.CONST_PREDICT_LOG, - self.infrastructure.CONST_SHAPE_CONFIG_DETAILS + self.infrastructure.CONST_SHAPE_CONFIG_DETAILS, } for spec_value in specs.values(): for key in spec_value: @@ -1572,7 +1575,9 @@ def _update_spec(self, **kwargs) -> "ModelDeployment": if key in sub_set: for sub_key in converted_specs[key]: converted_sub_key = ads_utils.snake_to_camel(sub_key) - spec_value[key][converted_sub_key] = converted_specs[key][sub_key] + spec_value[key][converted_sub_key] = converted_specs[key][ + sub_key + ] else: spec_value[key] = copy.deepcopy(converted_specs[key]) self = ( @@ -1616,14 +1621,14 @@ def _build_model_deployment_configuration_details(self) -> Dict: infrastructure.CONST_MEMORY_IN_GBS: infrastructure.shape_config_details.get( "memory_in_gbs", None ) - or infrastructure.shape_config_details.get( - "memoryInGBs", None - ) + or infrastructure.shape_config_details.get("memoryInGBs", None) or DEFAULT_MEMORY_IN_GBS, } if infrastructure.subnet_id: - instance_configuration[infrastructure.CONST_SUBNET_ID] = infrastructure.subnet_id + instance_configuration[ + infrastructure.CONST_SUBNET_ID + ] = infrastructure.subnet_id scaling_policy = { infrastructure.CONST_POLICY_TYPE: "FIXED_SIZE", @@ -1638,13 +1643,11 @@ def _build_model_deployment_configuration_details(self) -> Dict: model_id = runtime.model_uri if not model_id.startswith("ocid"): - from ads.model.datascience_model import DataScienceModel - + dsc_model = DataScienceModel( name=self.display_name, - compartment_id=self.infrastructure.compartment_id - or COMPARTMENT_OCID, + compartment_id=self.infrastructure.compartment_id or COMPARTMENT_OCID, project_id=self.infrastructure.project_id or PROJECT_OCID, artifact=runtime.model_uri, ).create( @@ -1653,7 +1656,7 @@ def _build_model_deployment_configuration_details(self) -> Dict: region=runtime.region, overwrite_existing_artifact=runtime.overwrite_existing_artifact, remove_existing_artifact=runtime.remove_existing_artifact, - timeout=runtime.timeout + timeout=runtime.timeout, ) model_id = dsc_model.id From e028cbba5a73b07f81da3bb918cce4d128c9cfa8 Mon Sep 17 00:00:00 2001 From: Dmitrii Cherkasov Date: Tue, 12 Sep 2023 16:55:14 -0700 Subject: [PATCH 07/17] Fixes the progress bar issue when exporting artifacts to the Model Catalog. --- ads/model/service/oci_datascience_model.py | 14 +++++++++----- .../user_guide/model_catalog/model_catalog.rst | 3 +-- .../model_registration/large_model_artifact.rst | 2 +- .../user_guide/model_registration/model_load.rst | 4 ++-- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/ads/model/service/oci_datascience_model.py b/ads/model/service/oci_datascience_model.py index 865bbf108..ec5c3e4ce 100644 --- a/ads/model/service/oci_datascience_model.py +++ b/ads/model/service/oci_datascience_model.py @@ -38,19 +38,19 @@ ) -class ModelProvenanceNotFoundError(Exception): # pragma: no cover +class ModelProvenanceNotFoundError(Exception): # pragma: no cover pass -class ModelArtifactNotFoundError(Exception): # pragma: no cover +class ModelArtifactNotFoundError(Exception): # pragma: no cover pass -class ModelNotSavedError(Exception): # pragma: no cover +class ModelNotSavedError(Exception): # pragma: no cover pass -class ModelWithActiveDeploymentError(Exception): # pragma: no cover +class ModelWithActiveDeploymentError(Exception): # pragma: no cover pass @@ -410,7 +410,7 @@ def export_model_artifact(self, bucket_uri: str, region: str = None): # Show progress of exporting model artifacts self._wait_for_work_request( work_request_id=work_request_id, - num_steps=3, + num_steps=2, ) @check_for_model_id( @@ -596,3 +596,7 @@ def _wait_for_work_request(self, work_request_id: str, num_steps: int = 3) -> No ) else: break + + while i < num_steps: + progress.update() + i += 1 diff --git a/docs/source/user_guide/model_catalog/model_catalog.rst b/docs/source/user_guide/model_catalog/model_catalog.rst index a533c8749..aaebaa87c 100644 --- a/docs/source/user_guide/model_catalog/model_catalog.rst +++ b/docs/source/user_guide/model_catalog/model_catalog.rst @@ -1204,7 +1204,7 @@ If you don't have an Object Storage bucket, create one using the OCI SDK or the Allow service datascience to manage object-family in compartment where ALL {target.bucket.name=''} - Allow service objectstorage to manage object-family in compartment where ALL {target.bucket.name=''} + Allow service objectstorage- to manage object-family in compartment where ALL {target.bucket.name=''} Saving ====== @@ -1545,4 +1545,3 @@ In the next example, the model that was stored in the model catalog as part of t .. code-block:: python3 mc.delete_model(mc_model.id) - diff --git a/docs/source/user_guide/model_registration/large_model_artifact.rst b/docs/source/user_guide/model_registration/large_model_artifact.rst index 65a5e5b1f..711a17e66 100644 --- a/docs/source/user_guide/model_registration/large_model_artifact.rst +++ b/docs/source/user_guide/model_registration/large_model_artifact.rst @@ -13,7 +13,7 @@ If you don't have an Object Storage bucket, create one using the OCI SDK or the Allow service datascience to manage object-family in compartment where ALL {target.bucket.name=''} - Allow service objectstorage to manage object-family in compartment where ALL {target.bucket.name=''} + Allow service objectstorage- to manage object-family in compartment where ALL {target.bucket.name=''} See `API documentation <../../ads.model.html#id10>`__ for more details. diff --git a/docs/source/user_guide/model_registration/model_load.rst b/docs/source/user_guide/model_registration/model_load.rst index c063cba28..44fb3876e 100644 --- a/docs/source/user_guide/model_registration/model_load.rst +++ b/docs/source/user_guide/model_registration/model_load.rst @@ -119,7 +119,7 @@ If you don't have an Object Storage bucket, create one using the OCI SDK or the Allow service datascience to manage object-family in compartment where ALL {target.bucket.name=''} - Allow service objectstorage to manage object-family in compartment where ALL {target.bucket.name=''} + Allow service objectstorage- to manage object-family in compartment where ALL {target.bucket.name=''} The following example loads a model using the large model artifact approach. The ``bucket_uri`` has the following syntax: ``oci://@//`` See `API documentation <../../ads.model.html#id4>`__ for more details. @@ -169,4 +169,4 @@ Alternatively the ``.from_id()`` method can be used to load registered or deploy bucket_uri=@/prefix/>, force_overwrite=True, remove_existing_artifact=True, - ) \ No newline at end of file + ) From e22c58e1c4781e8429538b329138473da9b67dd5 Mon Sep 17 00:00:00 2001 From: Dmitrii Cherkasov Date: Wed, 13 Sep 2023 10:35:00 -0700 Subject: [PATCH 08/17] Fixes unit tests. --- .../default_setup/model/test_oci_datascience_model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/unitary/default_setup/model/test_oci_datascience_model.py b/tests/unitary/default_setup/model/test_oci_datascience_model.py index 29ab8afd4..43148cc23 100644 --- a/tests/unitary/default_setup/model/test_oci_datascience_model.py +++ b/tests/unitary/default_setup/model/test_oci_datascience_model.py @@ -88,7 +88,6 @@ class TestOCIDataScienceModel: def setup_class(cls): - # Mock delete model response cls.mock_delete_model_response = Response( data=None, status=None, headers=None, request=None @@ -229,7 +228,9 @@ def test_delete_success(self, mock_client): mock_model_deployment.return_value = [ MagicMock(lifecycle_state="ACTIVE", identifier="md_id") ] - with patch("ads.model.deployment.ModelDeployment.from_id") as mock_from_id: + with patch( + "ads.model.deployment.ModelDeployment.from_id" + ) as mock_from_id: with patch.object(OCIDataScienceModel, "sync") as mock_sync: self.mock_model.delete(delete_associated_model_deployment=True) mock_from_id.assert_called_with("md_id") @@ -445,7 +446,7 @@ def test_export_model_artifact( ) mock_wait_for_work_request.assert_called_with( work_request_id="work_request_id", - num_steps=3, + num_steps=2, ) @patch.object(TqdmProgressBar, "update") From edca7d88a5eda822ed16fcef55d53c6ee2c0e360 Mon Sep 17 00:00:00 2001 From: John DeSanto <202220+jdesanto@users.noreply.github.com> Date: Wed, 20 Sep 2023 00:40:04 -0700 Subject: [PATCH 09/17] Update README-development.md (#324) --- README-development.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README-development.md b/README-development.md index dc1afe35c..d09515bef 100644 --- a/README-development.md +++ b/README-development.md @@ -58,7 +58,7 @@ Open the destination folder where you want to clone ADS library, and install dep python3 -m pip install -e . ``` -To which packages were installed and their version numbers, run: +To view which packages were installed and their version numbers, run: ```bash python3 -m pip freeze From 95fe7fae59fb54f127358c2bf233d0e954b71397 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Wed, 20 Sep 2023 18:08:55 -0400 Subject: [PATCH 10/17] Fixed unpatched object storage url bug in unit test. --- .../model_deployment_infrastructure.py | 1 + .../with_extras/model/test_artifact.py | 15 +++++- .../with_extras/model/test_env_info.py | 22 +++++++- .../with_extras/model/test_generic_model.py | 51 ++++++++++++++++--- .../model/test_model_deployment_details.py | 14 ++++- .../model/test_model_metadata_mixin.py | 29 ++++++++++- .../model/test_model_provenance_details.py | 16 +++++- .../with_extras/model/test_runtime_info.py | 42 +++++++++++++-- 8 files changed, 172 insertions(+), 18 deletions(-) diff --git a/ads/model/deployment/model_deployment_infrastructure.py b/ads/model/deployment/model_deployment_infrastructure.py index 519b5a62f..0bce46cf8 100644 --- a/ads/model/deployment/model_deployment_infrastructure.py +++ b/ads/model/deployment/model_deployment_infrastructure.py @@ -223,6 +223,7 @@ def _load_default_properties(self) -> Dict: defaults[self.CONST_REPLICA] = DEFAULT_REPLICA if NB_SESSION_OCID: + nb_session = None try: nb_session = DSCNotebookSession.from_ocid(NB_SESSION_OCID) except Exception as e: diff --git a/tests/unitary/with_extras/model/test_artifact.py b/tests/unitary/with_extras/model/test_artifact.py index e55f7c36c..e8f824894 100644 --- a/tests/unitary/with_extras/model/test_artifact.py +++ b/tests/unitary/with_extras/model/test_artifact.py @@ -138,10 +138,23 @@ def test_prepare_runtime_yaml_inference_training( (TrainingEnvInfo, mlcpu_path_cust, None, None, training_info_cust), ], ) + @patch("ads.model.runtime.env_info.get_service_packs") def test__populate_env_info_inference( - self, env_info_class, conda_pack, bucketname, namespace, expected_env_info + self, mock_get_service_packs, env_info_class, conda_pack, bucketname, namespace, expected_env_info ): """test _populate_env_info.""" + env_path = ( + expected_env_info.inference_env_path if isinstance(expected_env_info, InferenceEnvInfo) + else expected_env_info.training_env_path + ) + mock_get_service_packs.return_value = ( + { + env_path : ("mlcpuv1", "3.6") + }, + { + "mlcpuv1" : (env_path, "3.6") + } + ) env_info = self.artifact._populate_env_info( env_info_class, conda_pack=conda_pack, diff --git a/tests/unitary/with_extras/model/test_env_info.py b/tests/unitary/with_extras/model/test_env_info.py index 8f3ac8ade..cf555f68c 100644 --- a/tests/unitary/with_extras/model/test_env_info.py +++ b/tests/unitary/with_extras/model/test_env_info.py @@ -177,7 +177,16 @@ def test_from_slug_prod_sp(self): info.training_env_type = "service_pack" info.training_python_version = "3.6" - def test_from_slug_not_exist(self): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_from_slug_not_exist(self, mock_get_service_packs): + mock_get_service_packs.return_value = ( + { + "test_path" : ("mlcpuv1", "3.6"), + }, + { + "mlcpuv1" : ("test_path", "3.6"), + } + ) with pytest.warns(UserWarning, match="not a service pack"): TrainingEnvInfo.from_slug( "not_exist", namespace="ociodscdev", bucketname="service-conda-packs" @@ -256,7 +265,16 @@ def test_from_slug_prod_sp(self): info.inference_env_type = "service_pack" info.inference_python_version = "3.6" - def test_from_slug_not_exist(self): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_from_slug_not_exist(self, mock_get_service_packs): + mock_get_service_packs.return_value = ( + { + "test_path" : ("mlcpuv1", "3.6"), + }, + { + "mlcpuv1" : ("test_path", "3.6"), + } + ) with pytest.warns(UserWarning, match="not a service pack"): InferenceEnvInfo.from_slug( "not_exist", namespace="ociodscdev", bucketname="service-conda-packs" diff --git a/tests/unitary/with_extras/model/test_generic_model.py b/tests/unitary/with_extras/model/test_generic_model.py index 7773b8a84..527a2528c 100644 --- a/tests/unitary/with_extras/model/test_generic_model.py +++ b/tests/unitary/with_extras/model/test_generic_model.py @@ -279,14 +279,29 @@ def test_prepare_fail(self, mock_handle_model_file_name): "oci://service-conda-packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1" ) + @patch("ads.model.runtime.env_info.get_service_packs") @patch("ads.common.auth.default_signer") - def test_prepare_both_conda_env(self, mock_signer): + def test_prepare_both_conda_env(self, mock_signer, mock_get_service_packs): """prepare a model by only providing inference conda env.""" + inference_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1" + inference_python_version="3.6" + training_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/Oracle_Database_for_CPU_Python_3.7/1.0/database_p37_cpu_v1" + training_python_version="3.7" + mock_get_service_packs.return_value = ( + { + inference_conda_env : ("mlcpuv1", inference_python_version), + training_conda_env : ("database_p37_cpu_v1", training_python_version) + }, + { + "mlcpuv1" : (inference_conda_env, inference_python_version), + "database_p37_cpu_v1" : (training_conda_env, training_python_version) + } + ) self.generic_model.prepare( - inference_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1", - inference_python_version="3.6", - training_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/Oracle_Database_for_CPU_Python_3.7/1.0/database_p37_cpu_v1", - training_python_version="3.7", + inference_conda_env=inference_conda_env, + inference_python_version=inference_python_version, + training_conda_env=training_conda_env, + training_python_version=training_python_version, model_file_name="fake_model_name", force_overwrite=True, ) @@ -349,8 +364,19 @@ def test_reload(self): @patch.object(GenericModel, "_random_display_name", return_value="test_name") @patch.object(DataScienceModel, "create") - def test_save(self, mock_dsc_model_create, mock__random_display_name): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_save(self, mock_get_service_packs, mock_dsc_model_create, mock__random_display_name): """test saving a model to artifact.""" + inference_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/Data_Exploration_and_Manipulation_for_CPU_Python_3.7/3.0/dataexpl_p37_cpu_v3" + inference_python_version="3.7" + mock_get_service_packs.return_value = ( + { + inference_conda_env : ("dataexpl_p37_cpu_v3", inference_python_version), + }, + { + "dataexpl_p37_cpu_v3" : (inference_conda_env, inference_python_version), + } + ) mock_dsc_model_create.return_value = MagicMock(id="fake_id") self.generic_model.prepare( inference_conda_env="dataexpl_p37_cpu_v3", @@ -371,8 +397,19 @@ def test_save(self, mock_dsc_model_create, mock__random_display_name): parallel_process_count=utils.DEFAULT_PARALLEL_PROCESS_COUNT, ) - def test_save_not_implemented_error(self): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_save_not_implemented_error(self, mock_get_service_packs): """test saving a model to artifact.""" + inference_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/Data_Exploration_and_Manipulation_for_CPU_Python_3.7/3.0/dataexpl_p37_cpu_v3" + inference_python_version="3.7" + mock_get_service_packs.return_value = ( + { + inference_conda_env : ("dataexpl_p37_cpu_v3", inference_python_version), + }, + { + "dataexpl_p37_cpu_v3" : (inference_conda_env, inference_python_version), + } + ) self.generic_model._serialize = False self.generic_model.prepare( inference_conda_env="dataexpl_p37_cpu_v3", diff --git a/tests/unitary/with_extras/model/test_model_deployment_details.py b/tests/unitary/with_extras/model/test_model_deployment_details.py index e6122cb3f..eeb2a9de4 100644 --- a/tests/unitary/with_extras/model/test_model_deployment_details.py +++ b/tests/unitary/with_extras/model/test_model_deployment_details.py @@ -5,6 +5,7 @@ import os from unittest import TestCase +from unittest.mock import patch import yaml from ads.model.runtime.model_deployment_details import ModelDeploymentDetails @@ -27,7 +28,18 @@ def setUpClass(cls): with open(os.path.join(curr_dir, "runtime_fail.yaml"), encoding="utf-8") as rt: cls.runtime_dict_fail = yaml.load(rt, loader) - def test_from_dict(self): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_from_dict(self, mock_get_service_packs): + inference_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1" + inference_python_version="3.6" + mock_get_service_packs.return_value = ( + { + inference_conda_env : ("mlcpuv1", inference_python_version), + }, + { + "mlcpuv1" : (inference_conda_env, inference_python_version), + } + ) model_deployment = ModelDeploymentDetails.from_dict( self.runtime_dict["MODEL_DEPLOYMENT"] ) diff --git a/tests/unitary/with_extras/model/test_model_metadata_mixin.py b/tests/unitary/with_extras/model/test_model_metadata_mixin.py index 520aab631..1cdfb162b 100644 --- a/tests/unitary/with_extras/model/test_model_metadata_mixin.py +++ b/tests/unitary/with_extras/model/test_model_metadata_mixin.py @@ -3,6 +3,7 @@ # Copyright (c) 2022, 2023 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +from unittest.mock import patch import numpy as np import pytest from sklearn import datasets, linear_model @@ -104,7 +105,19 @@ def test_metadata_generic_model_container_runtime(self): ) assert model.metadata_provenance.training_id is None - def test_metadata_sklearn_model(self): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_metadata_sklearn_model(self, mock_get_service_packs): + conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/Data_Exploration_and_Manipulation_for_CPU_Python_3.7/3.0/dataexpl_p37_cpu_v3" + python_version="3.7" + mock_get_service_packs.return_value = ( + { + conda_env : ("dataexpl_p37_cpu_v3", python_version), + }, + { + "dataexpl_p37_cpu_v3" : (conda_env, python_version), + + } + ) model = SklearnModel(self.rgr, artifact_dir="./test_sklearn") model.prepare( inference_conda_env="dataexpl_p37_cpu_v3", @@ -146,7 +159,19 @@ def test_metadata_sklearn_model(self): ) assert model.metadata_provenance.training_id is None - def test_metadata_xgboost_model(self): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_metadata_xgboost_model(self, mock_get_service_packs): + conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/Data_Exploration_and_Manipulation_for_CPU_Python_3.7/3.0/dataexpl_p37_cpu_v3" + python_version="3.7" + mock_get_service_packs.return_value = ( + { + conda_env : ("dataexpl_p37_cpu_v3", python_version), + }, + { + "dataexpl_p37_cpu_v3" : (conda_env, python_version), + + } + ) model = XGBoostModel(self.xgb_rgr, artifact_dir="./test_xgboost") model.prepare( inference_conda_env="dataexpl_p37_cpu_v3", diff --git a/tests/unitary/with_extras/model/test_model_provenance_details.py b/tests/unitary/with_extras/model/test_model_provenance_details.py index 299443410..499be7c19 100644 --- a/tests/unitary/with_extras/model/test_model_provenance_details.py +++ b/tests/unitary/with_extras/model/test_model_provenance_details.py @@ -4,6 +4,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import os +from unittest.mock import patch import yaml from ads.model.runtime.model_provenance_details import ( @@ -57,8 +58,19 @@ def setup_class(cls): with open(os.path.join(curr_dir, "runtime_fail.yaml"), encoding="utf-8") as rt: cls.runtime_dict_fail = yaml.load(rt, loader) - def test_from_dict(self): - + @patch("ads.model.runtime.env_info.get_service_packs") + def test_from_dict(self, mock_get_service_packs): + conda_env="oci://service_conda_packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1" + python_version="3.6" + mock_get_service_packs.return_value = ( + { + conda_env : ("mlcpuv1", python_version), + }, + { + "mlcpuv1" : (conda_env, python_version), + + } + ) model_provenance = ModelProvenanceDetails.from_dict( self.runtime_dict["MODEL_PROVENANCE"] ) diff --git a/tests/unitary/with_extras/model/test_runtime_info.py b/tests/unitary/with_extras/model/test_runtime_info.py index c4da7f6be..2297a1f2b 100644 --- a/tests/unitary/with_extras/model/test_runtime_info.py +++ b/tests/unitary/with_extras/model/test_runtime_info.py @@ -36,7 +36,19 @@ def test__validate_dict_fail(self): with pytest.raises(AssertionError): RuntimeInfo._validate_dict(self.runtime_dict_fail) - def test_from_yaml(self): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_from_yaml(self, mock_get_service_packs): + conda_env="oci://service_conda_packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1" + python_version="3.6" + mock_get_service_packs.return_value = ( + { + conda_env : ("mlcpuv1", python_version), + }, + { + "mlcpuv1" : (conda_env, python_version), + + } + ) runtime_info = RuntimeInfo.from_yaml( uri=os.path.join(self.curr_dir, "runtime.yaml") ) @@ -95,7 +107,19 @@ def test_from_yaml(self): @patch.object(InferenceEnvInfo, "_validate", side_effect=DocumentError) @patch.object(TrainingEnvInfo, "_validate", side_effect=DocumentError) - def test_from_yaml_fail(self, mock_inference, mock_training): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_from_yaml_fail(self, mock_get_service_packs, mock_inference, mock_training): + conda_env="oci://service_conda_packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1" + python_version="3.6" + mock_get_service_packs.return_value = ( + { + conda_env : ("mlcpuv1", python_version), + }, + { + "mlcpuv1" : (conda_env, python_version), + + } + ) with pytest.raises(DocumentError): RuntimeInfo.from_yaml(uri=os.path.join(self.curr_dir, "runtime_fail.yaml")) @@ -119,7 +143,19 @@ def test_from_yaml_wrong_format(self, mock_provenance, mock_deployment): with pytest.raises(FileNotFoundError): RuntimeInfo.from_yaml(uri=os.path.join(self.curr_dir, "fake.yaml")) - def test_from_and_to_yaml_file(self): + @patch("ads.model.runtime.env_info.get_service_packs") + def test_from_and_to_yaml_file(self, mock_get_service_packs): + conda_env="oci://service_conda_packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1" + python_version="3.6" + mock_get_service_packs.return_value = ( + { + conda_env : ("mlcpuv1", python_version), + }, + { + "mlcpuv1" : (conda_env, python_version), + + } + ) runtime = RuntimeInfo.from_yaml(uri=os.path.join(self.curr_dir, "runtime.yaml")) runtime.to_yaml(uri=os.path.join(self.curr_dir, "runtime_copy.yaml")) runtime_copy = RuntimeInfo.from_yaml( From 06a37f05048139cab0389078ab010c7432a40b10 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Wed, 20 Sep 2023 18:38:05 -0400 Subject: [PATCH 11/17] Updated pr. --- tests/unitary/with_extras/model/test_generic_model.py | 2 +- tests/unitary/with_extras/model/test_model_metadata_mixin.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unitary/with_extras/model/test_generic_model.py b/tests/unitary/with_extras/model/test_generic_model.py index 527a2528c..afe1f9f8a 100644 --- a/tests/unitary/with_extras/model/test_generic_model.py +++ b/tests/unitary/with_extras/model/test_generic_model.py @@ -386,7 +386,7 @@ def test_save(self, mock_get_service_packs, mock_dsc_model_create, mock__random_ force_overwrite=True, training_id=None, ) - self.generic_model.save() + self.generic_model.save(ignore_introspection=True) assert self.generic_model.model_id is not None and isinstance( self.generic_model.model_id, str ) diff --git a/tests/unitary/with_extras/model/test_model_metadata_mixin.py b/tests/unitary/with_extras/model/test_model_metadata_mixin.py index 1cdfb162b..ad9b41f7e 100644 --- a/tests/unitary/with_extras/model/test_model_metadata_mixin.py +++ b/tests/unitary/with_extras/model/test_model_metadata_mixin.py @@ -217,7 +217,7 @@ def test_metadata_xgboost_model(self, mock_get_service_packs): assert model.metadata_provenance.training_id is None assert ( model.runtime_info.model_deployment.inference_conda_env.inference_env_path - == "oci://service-conda-packs@id19sfcrra6z/service_pack/cpu/Data_Exploration_and_Manipulation_for_CPU_Python_3.7/3.0/dataexpl_p37_cpu_v3" + == "oci://service-conda-packs@ociodscdev/service_pack/cpu/Data_Exploration_and_Manipulation_for_CPU_Python_3.7/3.0/dataexpl_p37_cpu_v3" ) def teardown_method(self): From 24b326251b4b6f78c868ad591e54c9073faa39b4 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Wed, 20 Sep 2023 18:40:58 -0400 Subject: [PATCH 12/17] Updated pr. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 28560e6e0..045599f60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ dependencies = [ "asteval>=0.9.25", "cerberus>=1.3.4", "cloudpickle>=1.6.0", - "fsspec>=0.8.7", + "fsspec>=0.8.7,<2023.9.1", # v2.9.1 introduced issues, releved by unit tests "gitpython>=3.1.2", "jinja2>=2.11.2", "matplotlib>=3.1.3", From c174d72b6b9243f0c20b4dd51bf2e2c83df922f1 Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Thu, 21 Sep 2023 12:20:31 -0400 Subject: [PATCH 13/17] Fixed issue caused by https://github.com/microsoft/onnxruntime/issues/17631. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 045599f60..f5700f404 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,7 +109,7 @@ onnx = [ "lightgbm==3.3.1", "onnx>=1.12.0", "onnxmltools>=1.10.0", - "onnxruntime>=1.10.0", + "onnxruntime>=1.10.0,<1.16", # v1.16 introduced issues https://github.com/microsoft/onnxruntime/issues/17631, releved by unit tests "oracle_ads[viz]", "protobuf<=3.20", "skl2onnx>=1.10.4", From f1c53908531cc5aabab7467677fd705e5f8a4e2e Mon Sep 17 00:00:00 2001 From: John DeSanto <202220+jdesanto@users.noreply.github.com> Date: Thu, 21 Sep 2023 14:59:18 -0700 Subject: [PATCH 14/17] Moved geopandas file load out of constructor (#337) --- ads/dataset/sampled_dataset.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ads/dataset/sampled_dataset.py b/ads/dataset/sampled_dataset.py index 665957ebd..8d53715c0 100644 --- a/ads/dataset/sampled_dataset.py +++ b/ads/dataset/sampled_dataset.py @@ -47,13 +47,13 @@ OptionalDependency, ) +NATURAL_EARTH_DATASET = "naturalearth_lowres" class PandasDataset(object): """ This class provides APIs that can work on a sampled dataset. """ - @runtime_dependency(module="geopandas", install_from=OptionalDependency.GEO) def __init__( self, sampled_df, @@ -67,9 +67,7 @@ def __init__( self.correlation = None self.feature_dist_html_dict = {} self.feature_types = metadata if metadata is not None else {} - self.world = geopandas.read_file( - geopandas.datasets.get_path("naturalearth_lowres") - ) + self.world = None self.numeric_columns = self.sampled_df.select_dtypes( utils.numeric_pandas_dtypes() @@ -562,7 +560,7 @@ def plot_gis_scatter(self, lon="longitude", lat="latitude", ax=None): ), ) world = geopandas.read_file( - geopandas.datasets.get_path("naturalearth_lowres") + geopandas.datasets.get_path(NATURAL_EARTH_DATASET) ) ax1 = world.plot(ax=ax, color="lightgrey", linewidth=0.5, edgecolor="white") gdf.plot(ax=ax1, color="blue", markersize=10) @@ -706,6 +704,12 @@ def _visualize_feature_distribution(self, html_widget): gdf = geopandas.GeoDataFrame( df, geometry=geopandas.points_from_xy(df["lon"], df["lat"]) ) + + if not self.world: + self.world = geopandas.read_file( + geopandas.datasets.get_path(NATURAL_EARTH_DATASET) + ) + self.world.plot( ax=ax, color="lightgrey", linewidth=0.5, edgecolor="white" ) From d72098c76908a56d16a6800926513890b9f161fe Mon Sep 17 00:00:00 2001 From: Lu Peng Date: Fri, 22 Sep 2023 23:15:59 -0400 Subject: [PATCH 15/17] Fixed generic model save() issue. --- .../artifact_introspection_test/model_artifact_validate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py b/ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py index 070bd6911..989633bb6 100644 --- a/ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +++ b/ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py @@ -30,7 +30,7 @@ HTML_PATH = os.path.join(_cwd, "resources", "template.html") CONFIG_PATH = os.path.join(_cwd, "resources", "config.yaml") PYTHON_VER_PATTERN = "^([3])(\.[6-9])(\.\d+)?$" -PAR_URL = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/Ri7zFc_h91sxMdgnza9Qnqw3Ina8hf8wzDvEpAnUXMDOnUR1U1fpsaBUjUfgPgIq/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json" +PAR_URL = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/WyjtfVIG0uda-P3-2FmAfwaLlXYQZbvPZmfX1qg0-sbkwEQO6jpwabGr2hMDBmBp/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json" TESTS = { "score_py": { @@ -195,7 +195,9 @@ def check_runtime_yml(file_path) -> Tuple[bool, str]: return False, TESTS["runtime_path_exist"]["error_msg"] else: TESTS["runtime_path_exist"]["success"] = False - return False, TESTS["runtime_path_exist"]["error_msg"] + TESTS["runtime_path_exist"][ + "error_msg" + ] = "WARNING: Unable to validate if INFERENCE_ENV_PATH exists. Please check if you have internet access." else: bucket_name = env_path.username namespace = env_path.hostname From da6eb53c09ace59538ac0abe63ba7b5dc9325a54 Mon Sep 17 00:00:00 2001 From: MING KANG Date: Wed, 27 Sep 2023 10:02:53 -0700 Subject: [PATCH 16/17] ODSC-46642/pass model by reference (#342) --- ads/model/artifact_uploader.py | 116 +++++++++++------- ads/model/datascience_model.py | 12 ++ .../model/test_artifact_uploader.py | 102 ++++++++++++--- 3 files changed, 166 insertions(+), 64 deletions(-) diff --git a/ads/model/artifact_uploader.py b/ads/model/artifact_uploader.py index 260761d34..92bc590bd 100644 --- a/ads/model/artifact_uploader.py +++ b/ads/model/artifact_uploader.py @@ -10,6 +10,7 @@ from typing import Dict, Optional from ads.common import utils +from ads.common.object_storage_details import ObjectStorageDetails from ads.model.common import utils as model_utils from ads.model.service.oci_datascience_model import OCIDataScienceModel @@ -29,7 +30,10 @@ def __init__(self, dsc_model: OCIDataScienceModel, artifact_path: str): artifact_path: str The model artifact location. """ - if not os.path.exists(artifact_path): + if not ( + ObjectStorageDetails.is_oci_path(artifact_path) + or os.path.exists(artifact_path) + ): raise ValueError(f"The `{artifact_path}` does not exist") self.dsc_model = dsc_model @@ -45,7 +49,7 @@ def upload(self): ) as progress: self.progress = progress self.progress.update("Preparing model artifacts ZIP archive.") - self._prepare_artiact_tmp_zip() + self._prepare_artifact_tmp_zip() self.progress.update("Uploading model artifacts.") self._upload() self.progress.update( @@ -55,22 +59,19 @@ def upload(self): except Exception: raise finally: - self._remove_artiact_tmp_zip() + self._remove_artifact_tmp_zip() - def _prepare_artiact_tmp_zip(self) -> str: + def _prepare_artifact_tmp_zip(self) -> str: """Prepares model artifacts ZIP archive. - Parameters - ---------- - progress: (TqdmProgressBar, optional). Defaults to `None`. - The progress indicator. - Returns ------- str Path to the model artifact ZIP archive. """ - if os.path.isfile(self.artifact_path) and self.artifact_path.lower().endswith( + if ObjectStorageDetails.is_oci_path(self.artifact_path): + self.artifact_zip_path = self.artifact_path + elif os.path.isfile(self.artifact_path) and self.artifact_path.lower().endswith( ".zip" ): self.artifact_zip_path = self.artifact_path @@ -80,7 +81,7 @@ def _prepare_artiact_tmp_zip(self) -> str: ) return self.artifact_zip_path - def _remove_artiact_tmp_zip(self): + def _remove_artifact_tmp_zip(self): """Removes temporary created artifact zip archive.""" if ( self.artifact_zip_path @@ -112,7 +113,10 @@ class LargeArtifactUploader(ArtifactUploader): Attributes ---------- artifact_path: str - The model artifact location. + The model artifact location. Possible values are: + - object storage path to zip archive. Example: `oci://@/prefix/mymodel.zip`. + - local path to zip archive. Example: `./mymodel.zip`. + - local path to folder with artifacts. Example: `./mymodel`. artifact_zip_path: str The uri of the zip of model artifact. auth: dict @@ -124,6 +128,8 @@ class LargeArtifactUploader(ArtifactUploader): The OCI Object Storage URI where model artifacts will be copied to. The `bucket_uri` is only necessary for uploading large artifacts which size is greater than 2GB. Example: `oci://@/prefix/`. + .. versionadded:: 2.8.10 + If artifact_path is object storage path to a zip archive, bucket_uri will be ignored. dsc_model: OCIDataScienceModel The data scince model instance. overwrite_existing_artifact: bool @@ -145,7 +151,7 @@ def __init__( self, dsc_model: OCIDataScienceModel, artifact_path: str, - bucket_uri: str, + bucket_uri: str = None, auth: Optional[Dict] = None, region: Optional[str] = None, overwrite_existing_artifact: Optional[bool] = True, @@ -159,11 +165,16 @@ def __init__( dsc_model: OCIDataScienceModel The data scince model instance. artifact_path: str - The model artifact location. - bucket_uri: str + The model artifact location. Possible values are: + - object storage path to zip archive. Example: `oci://@/prefix/mymodel.zip`. + - local path to zip archive. Example: `./mymodel.zip`. + - local path to folder with artifacts. Example: `./mymodel`. + bucket_uri: (str, optional). Defaults to `None`. The OCI Object Storage URI where model artifacts will be copied to. - The `bucket_uri` is only necessary for uploading large artifacts which + The `bucket_uri` is only necessary for uploading large artifacts from local which size is greater than 2GB. Example: `oci://@/prefix/`. + .. versionadded:: 2.8.10 + If `artifact_path` is object storage path to a zip archive, `bucket_uri` will be ignored. auth: (Dict, optional). Defaults to `None`. The default authetication is set using `ads.set_auth` API. If you need to override the default, use the `ads.common.auth.api_keys` or @@ -179,11 +190,22 @@ def __init__( parallel_process_count: (int, optional). The number of worker processes to use in parallel for uploading individual parts of a multipart upload. """ + self.auth = auth or dsc_model.auth + if ObjectStorageDetails.is_oci_path(artifact_path): + if not artifact_path.endswith(".zip"): + raise ValueError( + f"The `artifact_path={artifact_path}` is invalid." + "The remote path for model artifact should be a zip archive, " + "e.g. `oci://@/prefix/mymodel.zip`." + ) + if not utils.is_path_exists(uri=artifact_path, auth=self.auth): + raise ValueError(f"The `{artifact_path}` does not exist.") + bucket_uri = artifact_path + if not bucket_uri: raise ValueError("The `bucket_uri` must be provided.") super().__init__(dsc_model=dsc_model, artifact_path=artifact_path) - self.auth = auth or dsc_model.auth self.region = region or utils.extract_region(self.auth) self.bucket_uri = bucket_uri self.overwrite_existing_artifact = overwrite_existing_artifact @@ -192,38 +214,38 @@ def __init__( def _upload(self): """Uploads model artifacts to the model catalog.""" - self.progress.update("Copying model artifact to the Object Storage bucket") - bucket_uri = self.bucket_uri - bucket_uri_file_name = os.path.basename(bucket_uri) - - if not bucket_uri_file_name: - bucket_uri = os.path.join(bucket_uri, f"{self.dsc_model.id}.zip") - elif not bucket_uri.lower().endswith(".zip"): - bucket_uri = f"{bucket_uri}.zip" - - if not self.overwrite_existing_artifact and utils.is_path_exists( - uri=bucket_uri, auth=self.auth - ): - raise FileExistsError( - f"The bucket_uri=`{self.bucket_uri}` exists. Please use a new file name or " - "set `overwrite_existing_artifact` to `True` if you wish to overwrite." - ) + self.progress.update("Copying model artifact to the Object Storage bucket") + if not bucket_uri == self.artifact_zip_path: + bucket_uri_file_name = os.path.basename(bucket_uri) + + if not bucket_uri_file_name: + bucket_uri = os.path.join(bucket_uri, f"{self.dsc_model.id}.zip") + elif not bucket_uri.lower().endswith(".zip"): + bucket_uri = f"{bucket_uri}.zip" + + if not self.overwrite_existing_artifact and utils.is_path_exists( + uri=bucket_uri, auth=self.auth + ): + raise FileExistsError( + f"The bucket_uri=`{self.bucket_uri}` exists. Please use a new file name or " + "set `overwrite_existing_artifact` to `True` if you wish to overwrite." + ) - try: - utils.upload_to_os( - src_uri=self.artifact_zip_path, - dst_uri=bucket_uri, - auth=self.auth, - parallel_process_count=self._parallel_process_count, - force_overwrite=self.overwrite_existing_artifact, - progressbar_description="Copying model artifact to the Object Storage bucket.", - ) - except Exception as ex: - raise RuntimeError( - f"Failed to upload model artifact to the given Object Storage path `{self.bucket_uri}`." - f"See Exception: {ex}" - ) + try: + utils.upload_to_os( + src_uri=self.artifact_zip_path, + dst_uri=bucket_uri, + auth=self.auth, + parallel_process_count=self._parallel_process_count, + force_overwrite=self.overwrite_existing_artifact, + progressbar_description="Copying model artifact to the Object Storage bucket.", + ) + except Exception as ex: + raise RuntimeError( + f"Failed to upload model artifact to the given Object Storage path `{self.bucket_uri}`." + f"See Exception: {ex}" + ) self.progress.update("Exporting model artifact to the model catalog") self.dsc_model.export_model_artifact(bucket_uri=bucket_uri, region=self.region) diff --git a/ads/model/datascience_model.py b/ads/model/datascience_model.py index 8bbf6d0da..44c8f6708 100644 --- a/ads/model/datascience_model.py +++ b/ads/model/datascience_model.py @@ -11,6 +11,7 @@ import pandas from ads.common import utils +from ads.common.object_storage_details import ObjectStorageDetails from ads.config import COMPARTMENT_OCID, PROJECT_OCID from ads.feature_engineering.schema import Schema from ads.jobs.builders.base import Builder @@ -548,6 +549,8 @@ def create(self, **kwargs) -> "DataScienceModel": The OCI Object Storage URI where model artifacts will be copied to. The `bucket_uri` is only necessary for uploading large artifacts which size is greater than 2GB. Example: `oci://@/prefix/`. + .. versionadded:: 2.8.10 + If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored. overwrite_existing_artifact: (bool, optional). Defaults to `True`. Overwrite target bucket artifact if exists. remove_existing_artifact: (bool, optional). Defaults to `True`. @@ -636,6 +639,8 @@ def upload_artifact( The OCI Object Storage URI where model artifacts will be copied to. The `bucket_uri` is only necessary for uploading large artifacts which size is greater than 2GB. Example: `oci://@/prefix/`. + .. versionadded:: 2.8.10 + If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored. auth: (Dict, optional). Defaults to `None`. The default authentication is set using `ads.set_auth` API. If you need to override the default, use the `ads.common.auth.api_keys` or @@ -668,6 +673,13 @@ def upload_artifact( "timeout": timeout, } + if ObjectStorageDetails.is_oci_path(self.artifact): + if bucket_uri and bucket_uri != self.artifact: + logger.warn( + "The `bucket_uri` will be ignored and the value of `self.artifact` will be used instead." + ) + bucket_uri = self.artifact + if bucket_uri or utils.folder_size(self.artifact) > _MAX_ARTIFACT_SIZE_IN_BYTES: if not bucket_uri: raise ModelArtifactSizeError( diff --git a/tests/unitary/default_setup/model/test_artifact_uploader.py b/tests/unitary/default_setup/model/test_artifact_uploader.py index bc9daeabf..ef9372728 100644 --- a/tests/unitary/default_setup/model/test_artifact_uploader.py +++ b/tests/unitary/default_setup/model/test_artifact_uploader.py @@ -29,6 +29,7 @@ def setup_class(cls): cls.mock_artifact_zip_path = os.path.join( cls.curr_dir, "test_files/model_artifacts.zip" ) + cls.mock_oci_artifact_path = "oci://bucket-name@namespace/model_artifacts.zip" def teardown_class(cls): # if os.path.exists(cls.mock_artifact_path): @@ -73,6 +74,32 @@ def test__init__(self): overwrite_existing_artifact=False, remove_existing_artifact=False, ) + + invalid_artifact_path = "oci://my-bucket@my-tenancy/mymodel" + with pytest.raises(ValueError): + lg_artifact_uploader = LargeArtifactUploader( + dsc_model=self.mock_dsc_model, + artifact_path=invalid_artifact_path, + auth=self.mock_auth, + region=self.mock_region, + overwrite_existing_artifact=False, + remove_existing_artifact=False, + ) + + with patch("ads.common.utils.is_path_exists", return_value=False): + with pytest.raises( + ValueError, + match=f"The `{self.mock_oci_artifact_path}` does not exist", + ): + lg_artifact_uploader = LargeArtifactUploader( + dsc_model=self.mock_dsc_model, + artifact_path=self.mock_oci_artifact_path, + auth=self.mock_auth, + region=self.mock_region, + overwrite_existing_artifact=False, + remove_existing_artifact=False, + ) + auth = default_signer() lg_artifact_uploader = LargeArtifactUploader( dsc_model=self.mock_dsc_model, @@ -97,14 +124,25 @@ def test__init__(self): == DEFAULT_PARALLEL_PROCESS_COUNT ) - def test_prepare_artiact_tmp_zip(self): + with patch("ads.common.utils.is_path_exists", return_value=True): + uploader = LargeArtifactUploader( + dsc_model=self.mock_dsc_model, + artifact_path=self.mock_oci_artifact_path, + overwrite_existing_artifact=False, + remove_existing_artifact=False, + ) + assert uploader.artifact_path == self.mock_oci_artifact_path + assert uploader.bucket_uri == self.mock_oci_artifact_path + assert uploader.artifact_zip_path == None + + def test_prepare_artifact_tmp_zip(self): # Tests case when a folder provided as artifacts location with patch("ads.model.common.utils.zip_artifact") as mock_zip_artifact: mock_zip_artifact.return_value = "test_artifact.zip" artifact_uploader = SmallArtifactUploader( dsc_model=self.mock_dsc_model, artifact_path=self.mock_artifact_path ) - test_result = artifact_uploader._prepare_artiact_tmp_zip() + test_result = artifact_uploader._prepare_artifact_tmp_zip() assert test_result == "test_artifact.zip" # Tests case when a zip file provided as artifacts location @@ -114,17 +152,26 @@ def test_prepare_artiact_tmp_zip(self): dsc_model=self.mock_dsc_model, artifact_path=self.mock_artifact_path + ".zip", ) - test_result = artifact_uploader._prepare_artiact_tmp_zip() + test_result = artifact_uploader._prepare_artifact_tmp_zip() assert test_result == self.mock_artifact_path + ".zip" - def test_remove_artiact_tmp_zip(self): + # Tests case when a zip file provided as object storage path + with patch("ads.common.utils.is_path_exists", return_value=True): + artifact_uploader = LargeArtifactUploader( + dsc_model=self.mock_dsc_model, + artifact_path=self.mock_oci_artifact_path, + ) + test_result = artifact_uploader._prepare_artifact_tmp_zip() + assert test_result == self.mock_oci_artifact_path + + def test_remove_artifact_tmp_zip(self): artifact_uploader = SmallArtifactUploader( dsc_model=self.mock_dsc_model, artifact_path=self.mock_artifact_path ) with patch("shutil.rmtree") as mock_rmtree: # Tests case when tmp artifact needs to be removed artifact_uploader.artifact_zip_path = "artifacts.zip" - artifact_uploader._remove_artiact_tmp_zip() + artifact_uploader._remove_artifact_tmp_zip() mock_rmtree.assert_called_with("artifacts.zip", ignore_errors=True) with patch("os.path.exists", return_value=True): @@ -135,41 +182,44 @@ def test_remove_artiact_tmp_zip(self): # Tests case when tmp artifact shouldn't be removed artifact_uploader.artifact_zip_path = "artifacts.zip" artifact_uploader.artifact_path = "artifacts.zip" - artifact_uploader._remove_artiact_tmp_zip() + artifact_uploader._remove_artifact_tmp_zip() mock_rmtree.assert_not_called() @patch.object(SmallArtifactUploader, "_upload") - @patch.object(SmallArtifactUploader, "_prepare_artiact_tmp_zip") - @patch.object(SmallArtifactUploader, "_remove_artiact_tmp_zip") + @patch.object(SmallArtifactUploader, "_prepare_artifact_tmp_zip") + @patch.object(SmallArtifactUploader, "_remove_artifact_tmp_zip") def test_upload( - self, mock__remove_artiact_tmp_zip, mock__prepare_artiact_tmp_zip, mock__upload + self, + mock__remove_artifact_tmp_zip, + mock__prepare_artifact_tmp_zip, + mock__upload, ): artifact_uploader = SmallArtifactUploader( dsc_model=self.mock_dsc_model, artifact_path=self.mock_artifact_path ) artifact_uploader.upload() - mock__remove_artiact_tmp_zip.assert_called() - mock__prepare_artiact_tmp_zip.assert_called() + mock__remove_artifact_tmp_zip.assert_called() + mock__prepare_artifact_tmp_zip.assert_called() mock__upload.assert_called() def test_upload_small_artifact(self): with open(self.mock_artifact_zip_path, "rb") as file_data: with patch.object( SmallArtifactUploader, - "_prepare_artiact_tmp_zip", + "_prepare_artifact_tmp_zip", return_value=self.mock_artifact_zip_path, - ) as mock_prepare_artiact_tmp_zip: + ) as mock_prepare_artifact_tmp_zip: with patch.object( - SmallArtifactUploader, "_remove_artiact_tmp_zip" - ) as mock_remove_artiact_tmp_zip: + SmallArtifactUploader, "_remove_artifact_tmp_zip" + ) as mock_remove_artifact_tmp_zip: artifact_uploader = SmallArtifactUploader( dsc_model=self.mock_dsc_model, artifact_path=self.mock_artifact_path, ) artifact_uploader.artifact_zip_path = self.mock_artifact_zip_path artifact_uploader.upload() - mock_prepare_artiact_tmp_zip.assert_called() - mock_remove_artiact_tmp_zip.assert_called() + mock_prepare_artifact_tmp_zip.assert_called() + mock_remove_artifact_tmp_zip.assert_called() self.mock_dsc_model.create_model_artifact.assert_called() @patch("ads.common.utils.is_path_exists") @@ -215,6 +265,24 @@ def test_upload_large_artifact(self, mock_upload, mock_is_path_exists): ) artifact_uploader.upload() + @patch("ads.common.utils.is_path_exists", return_value=True) + @patch("ads.common.utils.upload_to_os") + def test_skip_upload(self, mock_upload, mock_is_path_exists): + """Tests case when provided artifact is object storage path.""" + artifact_uploader = LargeArtifactUploader( + dsc_model=self.mock_dsc_model, + artifact_path=self.mock_oci_artifact_path, + auth=default_signer(), + region=self.mock_region, + overwrite_existing_artifact=False, + remove_existing_artifact=False, + ) + artifact_uploader.upload() + mock_upload.assert_not_called() + self.mock_dsc_model.export_model_artifact.assert_called_with( + bucket_uri=self.mock_oci_artifact_path, region=self.mock_region + ) + def test_zip_artifact_fail(self): with pytest.raises(ValueError, match="The `artifact_dir` must be provided."): zip_artifact(None) From fa6f31501833a63a11efbf8c00aaccc1502f8846 Mon Sep 17 00:00:00 2001 From: Liuda Rudenka Date: Wed, 27 Sep 2023 17:08:58 -0500 Subject: [PATCH 17/17] ADS release 2.8.10 (#355) --- .gitleaks.toml | 1 + docs/source/release_notes.rst | 12 ++++++++++++ pyproject.toml | 6 +++--- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.gitleaks.toml b/.gitleaks.toml index f6dee79c4..ea1e8863d 100644 --- a/.gitleaks.toml +++ b/.gitleaks.toml @@ -13,6 +13,7 @@ useDefault = true '''example-password''', '''this-is-not-the-secret''', '''''', + '''security_token''', # NVIDIA_GPGKEY_SUM from public documentation: # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/centos7/base/Dockerfile '''d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87''' diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 04e1b9465..70ef1ba61 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -2,12 +2,24 @@ Release Notes ============= +2.8.10 +------ +Release date: September 27, 2023 + +* Improved the ``LargeArtifactUploader`` class to understand OCI paths to upload model artifacts to the model catalog by reference. +* Removed ``ADSDataset`` runtime dependency on ``geopandas``. +* Fixed a bug in the progress bar during model registration. +* Fixed a bug where session variable could be referenced before assignment. +* Fixed a bug with model artifact save. +* Fixed a bug with pipelines step. + 2.8.9 ----- Release date: September 5, 2023 * Upgraded the ``scikit-learn`` dependency to ``>=1.0``. * Upgraded the ``pandas`` dependency to ``>1.2.1,<2.1`` to allow you to use ADS with pandas 2.0. +* Implemented multi-part upload in the ``ArtifactUploader`` to upload model artifacts to the model catalog. * Fixed the "Attribute not found" error, when ``deploy()`` called twice in ``GenericModel``. * Fixed the fetch of the security token, when the relative path for the ``security_token_file`` is provided (used in session token-bases authentication). diff --git a/pyproject.toml b/pyproject.toml index f5700f404..0f337f229 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ build-backend = "flit_core.buildapi" # Required name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below -version = "2.8.9" +version = "2.8.10" # Optional description = "Oracle Accelerated Data Science SDK" @@ -57,7 +57,7 @@ dependencies = [ "asteval>=0.9.25", "cerberus>=1.3.4", "cloudpickle>=1.6.0", - "fsspec>=0.8.7,<2023.9.1", # v2.9.1 introduced issues, releved by unit tests + "fsspec>=0.8.7,<2023.9.1", # v2.9.1 introduced issues, revealed by unit tests "gitpython>=3.1.2", "jinja2>=2.11.2", "matplotlib>=3.1.3", @@ -109,7 +109,7 @@ onnx = [ "lightgbm==3.3.1", "onnx>=1.12.0", "onnxmltools>=1.10.0", - "onnxruntime>=1.10.0,<1.16", # v1.16 introduced issues https://github.com/microsoft/onnxruntime/issues/17631, releved by unit tests + "onnxruntime>=1.10.0,<1.16", # v1.16 introduced issues https://github.com/microsoft/onnxruntime/issues/17631, revealedd by unit tests "oracle_ads[viz]", "protobuf<=3.20", "skl2onnx>=1.10.4",