Skip to content

Commit

Permalink
sample: flow in pipeline (#2620)
Browse files Browse the repository at this point in the history
* doc: flow in pipeline for cli

* doc: flow in pipeline sdk experience

* doc: remind to grant permission to compute cluster

* Update README.md

* Update README.md

* Update flow_in_pipeline.ipynb

* update readme for cli sample

* move bs4 inside tool function to reduce dependency for compile

* doc: add information for office usage

* doc: update component spec glob

* feat: add requirements

* azure-ai-ml is released

* fix: smoke

* fix: resolve comments

* ci: add ci for flow in pipeline

* doc: update readme

* fix: smoke

* create connection with wrong settings

* fix: smoke

* fix: highlight schema requirement

* feat: use a sample without connection

* fix: include flow.tools.json

* fix: update text

* fix: remove connection creation

* fix: update input setting

* fix: remove connection reference in python tool

* fix: flow run setting

* fix: further clean connection reference

* feat: use the same pipeline in cli and sdk

* fix: black

* fix: make 2 example align
  • Loading branch information
elliotzh authored Oct 19, 2023
1 parent cb0cbee commit 5bb2997
Show file tree
Hide file tree
Showing 16 changed files with 654 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline
on:
workflow_dispatch:
schedule:
- cron: "4 6/12 * * *"
pull_request:
branches:
- main
paths:
- cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component/**
- infra/bootstrapping/**
- .github/workflows/cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline.yml
- cli/run-pipeline-jobs.sh
- cli/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: run job
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash -x ../../../run-job.sh pipeline.yml
working-directory: cli/jobs/pipelines-with-components/pipeline_job_with_flow_as_component
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# This code is autogenerated.
# Code is generated by running custom script: python3 readme.py
# Any manual changes to this file may cause incorrect behavior.
# Any manual changes will be overwritten if the code is regenerated.

name: sdk-jobs-pipelines-1l_flow_in_pipeline-flow_in_pipeline
# This file is created by sdk/python/readme.py.
# Please do not edit directly.
on:
workflow_dispatch:
schedule:
- cron: "41 6/12 * * *"
pull_request:
branches:
- main
paths:
- sdk/python/jobs/pipelines/1l_flow_in_pipeline/**
- .github/workflows/sdk-jobs-pipelines-1l_flow_in_pipeline-flow_in_pipeline.yml
- sdk/python/dev-requirements.txt
- infra/bootstrapping/**
- sdk/python/setup.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: check out repo
uses: actions/checkout@v2
- name: setup python
uses: actions/setup-python@v2
with:
python-version: "3.8"
- name: pip install notebook reqs
run: pip install -r sdk/python/dev-requirements.txt
- name: azure login
uses: azure/login@v1
with:
creds: ${{secrets.AZUREML_CREDENTIALS}}
- name: bootstrap resources
run: |
echo '${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}';
bash bootstrap.sh
working-directory: infra/bootstrapping
continue-on-error: false
- name: setup SDK
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: sdk/python
continue-on-error: true
- name: setup-cli
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash setup.sh
working-directory: cli
continue-on-error: true
- name: run jobs/pipelines/1l_flow_in_pipeline/flow_in_pipeline.ipynb
run: |
source "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh";
source "${{ github.workspace }}/infra/bootstrapping/init_environment.sh";
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" generate_workspace_config "../../.azureml/config.json";
bash "${{ github.workspace }}/infra/bootstrapping/sdk_helpers.sh" replace_template_values "flow_in_pipeline.ipynb";
[ -f "../../.azureml/config" ] && cat "../../.azureml/config";
papermill -k python flow_in_pipeline.ipynb flow_in_pipeline.output.ipynb
working-directory: sdk/python/jobs/pipelines/1l_flow_in_pipeline
- name: upload notebook's working folder as an artifact
if: ${{ always() }}
uses: actions/upload-artifact@v2
with:
name: flow_in_pipeline
path: sdk/python/jobs/pipelines/1l_flow_in_pipeline
1 change: 1 addition & 0 deletions cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ path|status|description
[jobs/pipelines-with-components/image_classification_with_densenet/pipeline.yml](jobs/pipelines-with-components/image_classification_with_densenet/pipeline.yml)|[![jobs/pipelines-with-components/image_classification_with_densenet/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-image_classification_with_densenet-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-image_classification_with_densenet-pipeline.yml)|Train densenet for image classification
[jobs/pipelines-with-components/nyc_taxi_data_regression/pipeline.yml](jobs/pipelines-with-components/nyc_taxi_data_regression/pipeline.yml)|[![jobs/pipelines-with-components/nyc_taxi_data_regression/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-nyc_taxi_data_regression-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-nyc_taxi_data_regression-pipeline.yml)|Train regression model based on nyc taxi dataset
[jobs/pipelines-with-components/nyc_taxi_data_regression/single-job-pipeline.yml](jobs/pipelines-with-components/nyc_taxi_data_regression/single-job-pipeline.yml)|[![jobs/pipelines-with-components/nyc_taxi_data_regression/single-job-pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-nyc_taxi_data_regression-single-job-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-nyc_taxi_data_regression-single-job-pipeline.yml)|Single job pipeline to train regression model based on nyc taxi dataset
[jobs/pipelines-with-components/pipeline_job_with_flow_as_component/pipeline.yml](jobs/pipelines-with-components/pipeline_job_with_flow_as_component/pipeline.yml)|[![jobs/pipelines-with-components/pipeline_job_with_flow_as_component/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-pipeline_job_with_flow_as_component-pipeline.yml)|The hello world pipeline job with flow as component
[jobs/pipelines-with-components/pipeline_with_hyperparameter_sweep/pipeline.yml](jobs/pipelines-with-components/pipeline_with_hyperparameter_sweep/pipeline.yml)|[![jobs/pipelines-with-components/pipeline_with_hyperparameter_sweep/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-pipeline_with_hyperparameter_sweep-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-pipeline_with_hyperparameter_sweep-pipeline.yml)|Tune hyperparameters using TF component
[jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/data_pipeline/data_pipeline.yml](jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/data_pipeline/data_pipeline.yml)|[![jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/data_pipeline/data_pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-pipeline_with_pipeline_component-nyc_taxi_data_regression_with_pipeline_component-data_pipeline-data_pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-pipeline_with_pipeline_component-nyc_taxi_data_regression_with_pipeline_component-data_pipeline-data_pipeline.yml)|pipeline component with data prep and transformation
[jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/pipeline.yml](jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/pipeline.yml)|[![jobs/pipelines-with-components/pipeline_with_pipeline_component/nyc_taxi_data_regression_with_pipeline_component/pipeline](https://github.com/Azure/azureml-examples/workflows/cli-jobs-pipelines-with-components-pipeline_with_pipeline_component-nyc_taxi_data_regression_with_pipeline_component-pipeline/badge.svg?branch=main)](https://github.com/Azure/azureml-examples/actions/workflows/cli-jobs-pipelines-with-components-pipeline_with_pipeline_component-nyc_taxi_data_regression_with_pipeline_component-pipeline.yml)|Train regression model based on nyc taxi dataset
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
This is a dummy pipeline job with anonymous reference of a flow as a component. Flow directory is copied from [sample in promptflow repository](https://github.com/microsoft/promptflow/tree/main/examples/flows/standard/basic) and remove connection dependency to avoid using promptflow connection in azure ml example repository.

Prerequirements:
1. `.promptflow/flow.tools.json` in the flow directory is required to use a flow as a component. Usually you can use `pf flow validate` or `pf run validate` to generate it.
2. You should either update connection name in `flow.dag.yaml` or update `connection.yaml` with your own api information and use `pf connection create --file connection.yaml` to create a workspace connection.
3. You need to either edit the compute cluster in `pipeline.yml` or create a compute cluster named `cpu-cluster` in your workspace.
4. Please ensure that there are `$schema` in your `flow.dag.yaml` and `run.yaml`
1. `flow.dag.yaml`: `$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json`
2. `run.yaml`: `$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Run.schema.json`

After that, you can run `az ml job create --file pipeline.yml` to submit the pipeline job.

References:
- [microsoft/promptflow: Build high-quality LLM apps](https://github.com/microsoft/promptflow)
- [Reference - Prompt flow docuentation](https://microsoft.github.io/promptflow/reference/index.html)
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"package": {},
"code": {
"hello.jinja2": {
"type": "prompt",
"inputs": {
"text": {
"type": [
"string"
]
}
},
"description": "Please replace the template with your own prompt.",
"source": "hello.jinja2"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Basic standard flow
A basic standard flow using custom python tool that calls Azure OpenAI with connection info stored in environment variables.

Tools used in this flow:
- `prompt` tool
- custom `python` Tool

Connections used in this flow:
- None

## Prerequisites

Install promptflow sdk and other dependencies:
```bash
pip install -r requirements.txt
```

## Run flow

- Prepare your Azure Open AI resource follow this [instruction](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal) and get your `api_key` if you don't have one.

- Setup environment variables

Ensure you have put your azure open ai endpoint key in [.env](.env) file. You can create one refer to this [example file](.env.example).

```bash
cat .env
```

- Test flow/node
```bash
# test with default input value in flow.dag.yaml
pf flow test --flow .

# test with flow inputs
pf flow test --flow . --inputs text="Java Hello World!"

# test node with inputs
pf flow test --flow . --node llm --inputs prompt="Write a simple Hello World program that displays the greeting message when executed."
```

- Create run with multiple lines data
```bash
# using environment from .env file (loaded in user code: hello.py)
pf run create --flow . --data ./data.jsonl --stream
```

- List and show run meta
```bash
# list created run
pf run list

# get a sample run name
name=$(pf run list -r 10 | jq '.[] | select(.name | contains("basic_variant_0")) | .name'| head -n 1 | tr -d '"')

# show specific run detail
pf run show --name $name

# show output
pf run show-details --name $name

# visualize run in browser
pf run visualize --name $name
```

## Run flow with connection
Storing connection info in .env with plaintext is not safe. We recommend to use `pf connection` to guard secrets like `api_key` from leak.

- Show or create `open_ai_connection`
```bash
# create connection from `azure_openai.yml` file
# Override keys with --set to avoid yaml file changes
pf connection create --file ../../../connections/azure_openai.yml --set api_key=<your_api_key> api_base=<your_api_base>

# check if connection exists
pf connection show -n open_ai_connection
```

- Test using connection secret specified in environment variables
**Note**: we used `'` to wrap value since it supports raw value without escape in powershell & bash. For windows command prompt, you may remove the `'` to avoid it become part of the value.

```bash
# test with default input value in flow.dag.yaml
pf flow test --flow . --environment-variables AZURE_OPENAI_API_KEY='${open_ai_connection.api_key}' AZURE_OPENAI_API_BASE='${open_ai_connection.api_base}'
```

- Create run using connection secret binding specified in environment variables, see [run.yml](run.yml)
```bash
# create run
pf run create --flow . --data ./data.jsonl --stream --environment-variables AZURE_OPENAI_API_KEY='${open_ai_connection.api_key}' AZURE_OPENAI_API_BASE='${open_ai_connection.api_base}'
# create run using yaml file
pf run create --file run.yml --stream

# show outputs
name=$(pf run list -r 10 | jq '.[] | select(.name | contains("basic_variant_0")) | .name'| head -n 1 | tr -d '"')
pf run show-details --name $name
```

## Run flow in cloud with connection
- Assume we already have a connection named `open_ai_connection` in workspace.
```bash
# set default workspace
az account set -s <your_subscription_id>
az configure --defaults group=<your_resource_group_name> workspace=<your_workspace_name>
```

- Create run
```bash
# run with environment variable reference connection in azureml workspace
pfazure run create --flow . --data ./data.jsonl --environment-variables AZURE_OPENAI_API_KEY='${open_ai_connection.api_key}' AZURE_OPENAI_API_BASE='${open_ai_connection.api_base}' --stream --runtime demo-mir
# run using yaml file
pfazure run create --file run.yml --stream --runtime demo-mir
```

- List and show run meta
```bash
# list created run
pfazure run list -r 3

# get a sample run name
name=$(pfazure run list -r 100 | jq '.[] | select(.name | contains("basic_variant_0")) | .name'| head -n 1 | tr -d '"')

# show specific run detail
pfazure run show --name $name

# show output
pfazure run show-details --name $name

# visualize run in browser
pfazure run visualize --name $name
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
inputs:
text:
type: string
default: Hello World!
outputs:
output:
type: string
reference: ${llm.output}
nodes:
- name: hello_prompt
type: prompt
source:
type: code
path: hello.jinja2
inputs:
text: ${inputs.text}
- name: llm
type: python
source:
type: code
path: hello.py
inputs:
prompt: ${hello_prompt.output}
deployment_name: text-davinci-003
max_tokens: "120"
environment:
python_requirements_txt: requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{# Please replace the template with your own prompt. #}
Write a simple {{text}} program that displays the greeting message when executed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import openai

from dotenv import load_dotenv
from promptflow import tool

# The inputs section will change based on the arguments of the tool function, after you save the code
# Adding type to arguments and return value will help the system show the types properly
# Please update the function name/signature per need


def to_bool(value) -> bool:
return str(value).lower() == "true"


@tool
def my_python_tool(
prompt: str,
# for AOAI, deployment name is customized by user, not model name.
deployment_name: str,
suffix: str = None,
max_tokens: int = 120,
temperature: float = 1.0,
top_p: float = 1.0,
n: int = 1,
logprobs: int = None,
echo: bool = False,
stop: list = None,
presence_penalty: float = 0,
frequency_penalty: float = 0,
best_of: int = 1,
logit_bias: dict = {},
user: str = "",
**kwargs,
) -> str:
if "AZURE_OPENAI_API_KEY" not in os.environ:
# load environment variables from .env file
load_dotenv()

if "AZURE_OPENAI_API_KEY" not in os.environ:
raise Exception("Please specify environment variables: AZURE_OPENAI_API_KEY")

conn = dict(
api_key=os.environ["AZURE_OPENAI_API_KEY"],
api_base=os.environ["AZURE_OPENAI_API_BASE"],
api_type=os.environ.get("AZURE_OPENAI_API_TYPE", "azure"),
api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2023-07-01-preview"),
)

# return directly to avoid using promptflow connection in azure ml example repository
return f"fake answer based on {prompt}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
promptflow[azure]
promptflow-tools
python-dotenv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"text": "Python Hello World!"}
{"text": "C Hello World!"}
{"text": "C# Hello World!"}
Loading

0 comments on commit 5bb2997

Please sign in to comment.