-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d94c212
commit 2e6eb86
Showing
7 changed files
with
1,713 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"Configuration":{"Script":null,"UseAbsolutePath":true,"Arguments":[],"SourceDirectoryDataStore":null,"Framework":0,"Target":"ComputeCluster-Coursera-ML","DataReferences":{},"Data":{},"OutputData":{},"Datacaches":[],"JobName":null,"MaxRunDurationSeconds":null,"NodeCount":null,"InstanceTypes":[],"Priority":null,"CredentialPassthrough":false,"Identity":null,"Environment":{"AutoRebuild":true,"Python":{"InterpreterPath":null,"UserManagedDependencies":false,"CondaDependencies":null,"BaseCondaEnvironment":null},"EnvironmentVariables":{},"Docker":{"BaseImage":null,"Platform":{"Os":"Linux","Architecture":"amd64"},"BaseDockerfile":null,"BaseImageRegistry":null},"Spark":{"Repositories":[],"Packages":[],"PrecachePackages":true},"InferencingStackVersion":null},"History":{"OutputCollection":true,"DirectoriesToWatch":["logs"],"EnableMLflowTracking":true},"Spark":{"Configuration":{}},"ParallelTask":{"MaxRetriesPerWorker":0,"WorkerCountPerNode":1,"TerminalExitCodes":null,"Configuration":{}},"BatchAi":{"NodeCount":0},"AmlCompute":{"Name":null,"VmSize":null,"RetainCluster":false,"ClusterMaxNodeCount":null},"AISuperComputer":{"InstanceType":"D2","FrameworkImage":null,"ImageVersion":null,"Location":null,"AISuperComputerStorageData":null,"Interactive":false,"ScalePolicy":null,"VirtualClusterArmId":null,"TensorboardLogDirectory":null,"SSHPublicKey":null,"SSHPublicKeys":null,"EnableAzmlInt":true,"Priority":"Medium","SLATier":"Standard","UserAlias":null},"KubernetesCompute":{"InstanceType":null},"Tensorflow":{"WorkerCount":0,"ParameterServerCount":0},"Mpi":{"ProcessCountPerNode":0},"PyTorch":{"CommunicationBackend":null,"ProcessCount":null},"Hdi":{"YarnDeployMode":0},"ContainerInstance":{"Region":null,"CpuCores":2.0,"MemoryGb":3.5},"ExposedPorts":null,"Docker":{"UseDocker":true,"SharedVolumes":null,"ShmSize":null,"Arguments":null},"Cmk8sCompute":{"Configuration":{}},"CommandReturnCodeConfig":{"ReturnCode":0,"SuccessfulReturnCodes":[]},"EnvironmentVariables":{},"ApplicationEndpoints":{},"Parameters":[]},"Attribution":null,"TelemetryValues":null,"Overrides":null,"SnapshotId":null,"Snapshots":[],"SourceCodeDataReference":null,"ParentRunId":null,"DataContainerId":null,"RunType":null,"DisplayName":null,"EnvironmentAssetId":null,"Properties":{},"Tags":{},"AggregatedArtifactPath":null} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"Configuration":{"Script":null,"UseAbsolutePath":true,"Arguments":[],"SourceDirectoryDataStore":null,"Framework":0,"Target":"ComputeCluster-Coursera-ML","DataReferences":{},"Data":{},"OutputData":{},"Datacaches":[],"JobName":null,"MaxRunDurationSeconds":null,"NodeCount":null,"InstanceTypes":[],"Priority":null,"CredentialPassthrough":false,"Identity":null,"Environment":{"Name":"AzureML-AutoML","Version":"177","AssetId":"azureml://registries/azureml/environments/AzureML-AutoML/versions/177","AutoRebuild":true,"Python":{"InterpreterPath":"python","UserManagedDependencies":true,"CondaDependencies":null,"BaseCondaEnvironment":null},"EnvironmentVariables":{"EXAMPLE_ENV_VAR":"EXAMPLE_VALUE"},"Docker":{"BaseImage":null,"Platform":{"Os":"Linux","Architecture":"amd64"},"BaseDockerfile":"FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240709.v1\n\nENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/azureml-automl\nENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH\n\nCOPY --from=mcr.microsoft.com/azureml/mlflow-ubuntu20.04-py38-cpu-inference:20230306.v3 /var/mlflow_resources/mlflow_score_script.py /var/mlflow_resources/mlflow_score_script.py\n\nENV MLFLOW_MODEL_FOLDER=\"mlflow-model\"\n# ENV AML_APP_ROOT=\"/var/mlflow_resources\"\n# ENV AZUREML_ENTRY_SCRIPT=\"mlflow_score_script.py\"\n\nENV ENABLE_METADATA=true\n\n# begin conda create\n# Create conda environment\nRUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \\\n python=3.9 \\\n # begin conda dependencies\n pip \\\n py-cpuinfo=5.0.0 \\\n joblib=1.2.0 \\\n setuptools-git \\\n 'psutil>5.0.0,<6.0.0' \\\n # end conda dependencies\n -c conda-forge -c pytorch -c anaconda && \\\n conda run -p $AZUREML_CONDA_ENVIRONMENT_PATH && \\\n conda clean -a -y\n# end conda create\n\n# begin pip install\n# Install pip dependencies\nRUN pip install \\\n # begin pypi dependencies\n 'azureml-core==1.56.0' \\\n 'azureml-mlflow==1.56.0' \\\n 'azureml-pipeline-core==1.56.0' \\\n 'azureml-telemetry==1.56.0' \\\n 'azureml-interpret==1.56.0' \\\n 'azureml-responsibleai==1.56.0' \\\n 'azureml-automl-core==1.56.0.post1' \\\n 'azureml-automl-runtime==1.56.0.post1' \\\n 'azureml-train-automl-client==1.56.0' \\\n 'azureml-train-automl-runtime==1.56.0.post1' \\\n 'azureml-train-automl==1.56.0' \\\n 'azureml-dataset-runtime==1.56.0' \\\n 'azureml-defaults==1.56.0.post1' \\\n # TODO: replace the hard coded above by a referenceto azureml-train-automl[tabular]\n 'mlflow-skinny==2.9.2' \\\n 'torch==1.13.1' \\\n 'xgboost==1.5.2' \\\n 'prophet==1.1.4' \\\n 'inference-schema' \\\n 'mltable>=1.0.0'\n # end pypi dependencies\n# end pip install\n\n# begin pip ad-hoc\n# Install pip ad-hoc dependencies for security updates\nRUN pip install --upgrade 'pyarrow==14.0.2'\n# end pip ad-hoc\n\n","BaseImageRegistry":{"Address":null,"Username":null,"Password":null},"Enabled":false,"Arguments":[]},"Spark":{"Repositories":[],"Packages":[],"PrecachePackages":true},"InferencingStackVersion":null},"History":{"OutputCollection":true,"DirectoriesToWatch":["logs"],"EnableMLflowTracking":true},"Spark":{"Configuration":{}},"ParallelTask":{"MaxRetriesPerWorker":0,"WorkerCountPerNode":1,"TerminalExitCodes":null,"Configuration":{}},"BatchAi":{"NodeCount":0},"AmlCompute":{"Name":null,"VmSize":null,"RetainCluster":false,"ClusterMaxNodeCount":null},"AISuperComputer":{"InstanceType":"D2","FrameworkImage":null,"ImageVersion":null,"Location":null,"AISuperComputerStorageData":null,"Interactive":false,"ScalePolicy":null,"VirtualClusterArmId":null,"TensorboardLogDirectory":null,"SSHPublicKey":null,"SSHPublicKeys":null,"EnableAzmlInt":true,"Priority":"Medium","SLATier":"Standard","UserAlias":null},"KubernetesCompute":{"InstanceType":null},"Tensorflow":{"WorkerCount":0,"ParameterServerCount":0},"Mpi":{"ProcessCountPerNode":0},"PyTorch":{"CommunicationBackend":null,"ProcessCount":null},"Hdi":{"YarnDeployMode":0},"ContainerInstance":{"Region":null,"CpuCores":2.0,"MemoryGb":3.5},"ExposedPorts":null,"Docker":{"UseDocker":true,"SharedVolumes":null,"ShmSize":null,"Arguments":null},"Cmk8sCompute":{"Configuration":{}},"CommandReturnCodeConfig":{"ReturnCode":0,"SuccessfulReturnCodes":[]},"EnvironmentVariables":{"AUTOML_SDK_RESOURCE_URL":"https://aka.ms/automl-resources/"},"ApplicationEndpoints":{},"Parameters":[]},"Attribution":null,"TelemetryValues":null,"Overrides":null,"SnapshotId":null,"Snapshots":[],"SourceCodeDataReference":null,"ParentRunId":null,"DataContainerId":null,"RunType":null,"DisplayName":null,"EnvironmentAssetId":null,"Properties":{},"Tags":{},"AggregatedArtifactPath":null} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Conda environment specification. The dependencies defined in this file will | ||
# be automatically provisioned for runs with userManagedDependencies=False. | ||
|
||
# Details about the Conda environment file format: | ||
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually | ||
|
||
name: project_environment | ||
dependencies: | ||
# The python interpreter version. | ||
# Currently Azure ML only supports 3.8 and later. | ||
- python=3.9.19 | ||
|
||
- pip: | ||
- azureml-train-automl-runtime==1.56.0.post1 | ||
- inference-schema | ||
- xgboost<=1.5.2 | ||
- azureml-interpret==1.56.0 | ||
- azureml-defaults==1.56.0.post1 | ||
- numpy==1.23.5 | ||
- pandas==1.3.5 | ||
- scikit-learn==1.1.3 | ||
- prophet==1.1.4 | ||
- holidays==0.52 | ||
- psutil==5.9.3 | ||
channels: | ||
- anaconda | ||
- conda-forge |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,349 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"\n", | ||
"\n", | ||
"https://ml.azure.com/runs/ashy_town_l9p62xl934_coursera_job1_20?wsid=/subscriptions/07d0a5db-6861-421a-ade5-a762db44e8e7/resourcegroups/RG-Coursera-AzureML/workspaces/Studio-Coursera-AzureML" | ||
], | ||
"metadata": { | ||
"automl_codegen": { | ||
"arguments": [ | ||
"automl_child_run_id", | ||
"child_run_url" | ||
] | ||
} | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# Train using Azure Machine Learning Compute\n", | ||
"\n", | ||
"* Connect to an Azure Machine Learning Workspace\n", | ||
"* Use existing compute target or create new\n", | ||
"* Configure & Run command\n" | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Prerequisites\n", | ||
"Please ensure Azure Machine Learning Python SDK v2 is installed on the machine running Jupyter." | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"## Connect to a Workspace\n", | ||
"\n", | ||
"Initialize a workspace object from the previous experiment. " | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# Import the required libraries\n", | ||
"from azure.identity import DefaultAzureCredential\n", | ||
"from azure.ai.ml import MLClient\n", | ||
"\n", | ||
"# The workspace information from the previous experiment has been pre-filled for you.\n", | ||
"subscription_id = \"***************\". ## Place holder. Provide the subscription id\n", | ||
"resource_group = \"****************\". ## Place holder. Provide the resource group\n", | ||
"workspace_name = \"*****************\". ## Place holder. Provide the workspace\n", | ||
"\n", | ||
"credential = DefaultAzureCredential()\n", | ||
"ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)\n", | ||
"workspace = ml_client.workspaces.get(name=ml_client.workspace_name)\n", | ||
"print(ml_client.workspace_name, workspace.resource_group, workspace.location, ml_client.connections._subscription_id, sep = '\\n')" | ||
], | ||
"outputs": [], | ||
"execution_count": null, | ||
"metadata": { | ||
"tags": [ | ||
"create workspace" | ||
], | ||
"automl_codegen": { | ||
"arguments": [ | ||
"subscription_id", | ||
"resource_group", | ||
"workspace_name" | ||
] | ||
} | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"### Create project directory\n", | ||
"\n", | ||
"Create a directory that will contain the training script that you will need access to on the remote resource." | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"import os\n", | ||
"import shutil\n", | ||
"\n", | ||
"project_folder = os.path.join(\".\", 'code_folder')\n", | ||
"os.makedirs(project_folder, exist_ok=True)\n", | ||
"shutil.copy('script.py', project_folder)" | ||
], | ||
"outputs": [], | ||
"execution_count": null, | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"### Use existing compute target or create new (Basic)\n", | ||
"\n", | ||
"Azure Machine Learning Compute is managed compute infrastructure that allows the user to easily create single to multi-node compute of the appropriate VM Family. It is created **within your workspace region** and is a resource that can be used by other users in your workspace. It autoscales by default to the max_nodes, when a job is submitted, and executes in a containerized environment packaging the dependencies as specified by the user. \n", | ||
"\n", | ||
"Since it is managed compute, job scheduling and cluster management are handled internally by Azure Machine Learning service. \n", | ||
"\n", | ||
"A compute cluster can be created using the `AmlCompute` class. Some of the key parameters of this class are:\n", | ||
"\n", | ||
"* `size` - The VM size to use for the cluster. For more information, see [Supported VM series and sizes](https://docs.microsoft.com/en-us/azure/machine-learning/concept-compute-target#supported-vm-series-and-sizes).\n", | ||
"* `max_instances` - The maximum number of nodes to use on the cluster. Default is 1." | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from azure.ai.ml.entities import AmlCompute\n", | ||
"\n", | ||
"# Choose a name for your CPU cluster\n", | ||
"cluster_name = \"ComputeCluster-Coursera-ML\"\n", | ||
"\n", | ||
"# Verify that cluster does not exist already\n", | ||
"try:\n", | ||
" cluster = ml_client.compute.get(cluster_name)\n", | ||
" print('Found existing cluster, use it.')\n", | ||
"except Exception:\n", | ||
" compute = AmlCompute(name=cluster_name, size='Standard_D11_v2',\n", | ||
" max_instances=4)\n", | ||
" cluster = ml_client.compute.begin_create_or_update(compute)\n" | ||
], | ||
"outputs": [], | ||
"execution_count": null, | ||
"metadata": { | ||
"tags": [ | ||
"sample-amlcompute-provision" | ||
], | ||
"automl_codegen": { | ||
"arguments": [ | ||
"compute_target", | ||
"compute_sku" | ||
] | ||
} | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"### Configure & Run\n", | ||
"The environment and compute has been pre-filled from the original training job. More information can be found here:\n", | ||
"\n", | ||
"`command`: https://docs.microsoft.com/en-us/python/api/azure-ai-ml/azure.ai.ml?view=azure-python-preview#azure-ai-ml-command\n", | ||
"\n", | ||
"`environment`: https://docs.microsoft.com/en-us/azure/machine-learning/resource-curated-environments#automated-ml-automl\n", | ||
"\n", | ||
"`compute`: https://docs.microsoft.com/en-us/python/api/azure-ai-ml/azure.ai.ml.entities.amlcompute?view=azure-python-preview\n", | ||
"\n" | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# To test the script with an environment referenced by a custom yaml file, uncomment the following lines and replace the `conda_file` value with the path to the yaml file.\n", | ||
"# Set the value of `environment` in the `command` job below to `env`.\n", | ||
"\n", | ||
"# env = Environment(\n", | ||
"# name=\"automl-tabular-env\",\n", | ||
"# description=\"environment for automl inference\",\n", | ||
"# image=\"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20210727.v1\",\n", | ||
"# conda_file=\"conda.yaml\",\n", | ||
"# )" | ||
], | ||
"outputs": [], | ||
"execution_count": null, | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"from azure.ai.ml import command, Input\n", | ||
"\n", | ||
"# To test with new training / validation datasets, replace the default dataset id(s)/uri(s) taken from parent run below\n", | ||
"command_str = 'python script.py --training_dataset_uri azureml://locations/eastus2/workspaces/097cb032-e51a-422a-b073-921eaf71b3fa/data/coursera_daily_rentals/versions/1'\n", | ||
"command_job = command(\n", | ||
" code=project_folder,\n", | ||
" command=command_str,\n", | ||
" tags=dict(automl_child_run_id='ashy_town_l9p62xl934_coursera_job1_20'),\n", | ||
" environment='AzureML-AutoML:177',\n", | ||
" compute='ComputeCluster-Coursera-ML',\n", | ||
" experiment_name='Default_coursera_experiment_1')\n", | ||
" \n", | ||
"returned_job = ml_client.create_or_update(command_job)\n", | ||
"returned_job.studio_url" | ||
], | ||
"outputs": [], | ||
"execution_count": null, | ||
"metadata": { | ||
"automl_codegen": { | ||
"arguments": [ | ||
"script_filename", | ||
"script_arguments", | ||
"environment_name", | ||
"environment_version", | ||
"compute_target", | ||
"experiment_name", | ||
"automl_child_run_id" | ||
] | ||
} | ||
} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"### Initialize MLFlow Client\n", | ||
"The metrics and artifacts for the run can be accessed via the MLFlow interface. \n", | ||
"Initialize the MLFlow client here, and set the backend as Azure ML, via. the MLFlow Client.\n", | ||
"\n", | ||
"*IMPORTANT*, you need to have installed the latest MLFlow packages with:\n", | ||
"\n", | ||
" pip install azureml-mlflow\n", | ||
"\n", | ||
" pip install mlflow" | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# %pip install azureml-mlflow\n", | ||
"# %pip install mlflow" | ||
], | ||
"outputs": [], | ||
"execution_count": null, | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"import mlflow\n", | ||
"\n", | ||
"# Obtain the tracking URL from MLClient\n", | ||
"MLFLOW_TRACKING_URI = ml_client.workspaces.get(\n", | ||
" name=ml_client.workspace_name\n", | ||
").mlflow_tracking_uri\n", | ||
"\n", | ||
"# Set the MLFLOW TRACKING URI\n", | ||
"\n", | ||
"mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n", | ||
"\n", | ||
"# Retrieve the metrics logged to the run.\n", | ||
"from mlflow.tracking.client import MlflowClient\n", | ||
"\n", | ||
"# Initialize MLFlow client\n", | ||
"mlflow_client = MlflowClient()\n", | ||
"mlflow_run = mlflow_client.get_run(returned_job.name)\n", | ||
"mlflow_run.data.metrics\n" | ||
], | ||
"outputs": [], | ||
"execution_count": null, | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"### Download Fitted Model\n", | ||
"Download the resulting fitted model to the local folder in `local_dir`." | ||
], | ||
"metadata": {} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"# import os\n", | ||
"\n", | ||
"# Create local folder\n", | ||
"# local_dir = \"./artifact_downloads\"\n", | ||
"# if not os.path.exists(local_dir):\n", | ||
"# os.mkdir(local_dir)\n", | ||
"# Download run's artifacts/outputs\n", | ||
"# local_path = mlflow_client.download_artifacts(\n", | ||
"# mlflow_run.info.run_id, \"outputs\", local_dir# )\n", | ||
"# print(\"Artifacts downloaded in: {}\".format(local_path))\n", | ||
"# print(\"Artifacts: {}\".format(os.listdir(local_path)))\n" | ||
], | ||
"outputs": [], | ||
"execution_count": null, | ||
"metadata": {} | ||
} | ||
], | ||
"metadata": { | ||
"index_order": 1, | ||
"automl_sdk_version": "1.56.0", | ||
"exclude_from_index": false, | ||
"task": "Submit a run on Azure Machine Learning Compute.", | ||
"deployment": [ | ||
"None" | ||
], | ||
"authors": [ | ||
{ | ||
"name": "vivijay" | ||
} | ||
], | ||
"microsoft": { | ||
"ms_spell_check": { | ||
"ms_spell_check_language": "en" | ||
} | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.10" | ||
}, | ||
"compute": [ | ||
"AML Compute" | ||
], | ||
"kernelspec": { | ||
"name": "python310-sdkv2", | ||
"language": "python", | ||
"display_name": "Python 3.10 - SDK V2" | ||
}, | ||
"tags": [ | ||
"None" | ||
], | ||
"datasets": [ | ||
"Diabetes" | ||
], | ||
"category": "training", | ||
"framework": [ | ||
"None" | ||
], | ||
"friendly_name": "Train on Azure Machine Learning Compute", | ||
"nteract": { | ||
"version": "nteract-front-end@1.0.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.