diff --git a/0_model_training_pipeline.ipynb b/0_model_training_pipeline.ipynb index 6d37672..00850ae 100644 --- a/0_model_training_pipeline.ipynb +++ b/0_model_training_pipeline.ipynb @@ -75,8 +75,8 @@ }, "outputs": [], "source": [ - "# import sys\n", - "# !{sys.executable} -m pip install -r requirements.txt" + "import sys\n", + "!{sys.executable} -m pip install -r requirements.txt --upgrade-strategy only-if-needed" ] }, { @@ -747,7 +747,7 @@ "source": [ "# Start pipeline with credit data and preprocessing script\n", "execution = pipeline.start(\n", - " execution_display_name=pipeline.name\n", + " execution_display_name=pipeline.name,\n", " parameters=dict(\n", " AccuracyConditionThreshold=config['evaluation_step']['accuracy_condition_threshold'],\n", " MaximumParallelTrainingJobs=config['tuning_step']['maximum_parallel_training_jobs'],\n", diff --git a/1_batch_transform_pipeline.ipynb b/1_batch_transform_pipeline.ipynb index c156651..f99694b 100644 --- a/1_batch_transform_pipeline.ipynb +++ b/1_batch_transform_pipeline.ipynb @@ -30,8 +30,8 @@ }, "outputs": [], "source": [ - "#import sys\n", - "#!{sys.executable} -m pip install -r requirements.txt" + "import sys\n", + "!{sys.executable} -m pip install -r requirements.txt --upgrade-strategy only-if-needed" ] }, { @@ -65,9 +65,8 @@ "from sagemaker.workflow.execution_variables import ExecutionVariables\n", "\n", "from sagemaker.model import Model\n", - "from sagemaker.inputs import CreateModelInput\n", - "from sagemaker.workflow.model_step import ModelStep\n", - "from sagemaker.transformer import Transformer" + "from sagemaker.transformer import Transformer\n", + "from sagemaker.workflow.model_step import ModelStep" ] }, { diff --git a/2_realtime_inference.ipynb b/2_realtime_inference.ipynb index bdb0aa0..689fb71 100644 --- a/2_realtime_inference.ipynb +++ b/2_realtime_inference.ipynb @@ -28,8 +28,8 @@ }, "outputs": [], "source": [ - "#import sys\n", - "#!{sys.executable} -m pip install -r requirements.txt" + "import sys\n", + "!{sys.executable} -m pip install -r requirements.txt --upgrade-strategy only-if-needed" ] }, { @@ -52,7 +52,6 @@ "import sagemaker.session\n", "from datetime import datetime\n", "from typing import Dict, List\n", - "from utils import load_config, print_pipeline_execution_summary\n", "from sagemaker.workflow.pipeline_context import PipelineSession" ] }, diff --git a/README.md b/README.md index f199988..b49c8c5 100644 --- a/README.md +++ b/README.md @@ -138,11 +138,10 @@ Setup a secret in Secrets Manager for the PrestoDB username and password. Call t 1. Edit the [`config`](./config.yml) as per PrestoDB connection, IAM role and other pipeline details such as instance types for various pipeline steps etc. - - [**Mandatory**] Edit the parameter values in the `presto` section. - - [**Mandatory**] Edit the parameter values in the `aws` section. - - [**Mandatory**] Edit the `query` parameter value in the `training_step` section. This is the query for retrieving the training data from the PrestoDB. - - [**Mandatory**] Edit the `query` parameter value in the `transform_step` section. This is the query for retrieving the data for the batch transform from the PrestoDB. - - [Optional] Edit the parameter values in the rest of the sections as appropriate. + - Edit the parameter values in the `presto` section. These parameters define the connectivity to PrestoDB. + - Edit the parameter values in the `aws` section. These parameters define the IAM role, bucket name, region and other AWS cloud related parameters. + - Edit the parameter values in the sections corresponding to the pipeline steps i.e. `training_step`, `tuning_step`, `transform_step` etc. Review all the parameters in these sections carefully and edit them as appropriate for your use-case. + - Review the parameters in the rest of the sections of the [`config`](./config.yml)and edit them if needed. 1. Run the [`0_model_training_pipeline`](./0_model_training_pipeline.ipynb) notebook to train and tune the ML model and register it with the SageMaker model registry. All the steps in this notebook are executed as part of a training pipeline. - This notebook also contains an automatic model approval step that changes the state of the model registered with the model registry from `PendingForApproval` to `Approved` state. This step can be removed for prod accounts where manual or some criteria based approval would be required. diff --git a/code/query.py b/code/query.py deleted file mode 100644 index e411ce7..0000000 --- a/code/query.py +++ /dev/null @@ -1,24 +0,0 @@ -BATCH_INFERENCE_QUERY="""SELECT - o.orderkey, - COUNT(l.linenumber) AS lineitem_count, - SUM(l.quantity) AS total_quantity, - AVG(l.discount) AS avg_discount, - SUM(l.extendedprice) AS total_extended_price, - o.orderdate, - o.orderpriority, - CASE - WHEN SUM(l.extendedprice) > 20000 THEN 1 - ELSE 0 - END AS high_value_order -FROM - orders o -JOIN - lineitem l ON o.orderkey = l.orderkey -GROUP BY - o.orderkey, - o.orderdate, - o.orderpriority -ORDER BY - RANDOM() -LIMIT 5000 -""" \ No newline at end of file diff --git a/config.yml b/config.yml index 2bf35ea..d2fdd2c 100644 --- a/config.yml +++ b/config.yml @@ -2,7 +2,7 @@ aws: region: us-east-1 # execution role, replace the role name below with the one you are using - sagemaker_execution_role_name: your-role-name + sagemaker_execution_role_name: your-sagemaker-execution-role # the execution role ARN is determined automatically by the code sagemaker_execution_role_arn: arn:aws:iam::{account_id}:role/{role} s3_bucket: sagemaker-{region}-{account_id} # region and account id are automatically replaced