-
Notifications
You must be signed in to change notification settings - Fork 0
/
.project-metadata.yaml
152 lines (139 loc) · 5.49 KB
/
.project-metadata.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
name: Fine Tuning Studio
description: |
This AMP demonstrates how PEFT and other fine-tuning optimization techniques can be used for efficient and effective customization of an existing LLM to perform new tasks.
author: Cloudera Inc.
specification_version: 1.0
prototype_version: 1.0
date: "2023-07-22"
environment_variables:
FINE_TUNING_STUDIO_SQLITE_DB:
default: ".app/state.db"
description: >-
This is the location of the FTS app's SQLite database.
FINE_TUNING_STUDIO_PROJECT_DEFAULTS:
default: "data/project_defaults.json"
description: >-
Project defaults for the app that are populated into the Studio when the AMP is initially deployed.
CUSTOM_LORA_ADAPTERS_DIR:
default: "data/adapters/"
description: >-
The directory containing the reproduced LoRA adapters created by the fine-tuning jobs in this project. Also the location to look for any custom LoRA adapters.
HUGGINGFACE_ACCESS_TOKEN:
default: ""
description: >-
In order to access Huggingface gated models, please create a Huggingface Access Token. Log in to Huggingface -> Settings -> Access Tokens.
runtimes:
- editor: PBJ Workbench
kernel: Python 3.9
edition: Nvidia GPU
tasks:
# - type: run_session
# name: Validate GPU Availibility in this workspace
# script: bin/check_gpu_resources.py
# short_summary: Check for GPU availibility.
# long_summary: Check GPUs are enabled on this workspace and are currently schedulable.
# kernel: python3
# cpu: 2
# memory: 4
- type: run_session
name: Install Dependencies
script: bin/install-dependencies.py
short_summary: Install Dependencies
kernel: python3
cpu: 2
memory: 8
# - type: run_session
# name: Validate GPU CUDA Capability
# script: bin/check_gpu_capability.py
# short_summary: Check for GPU capability.
# long_summary: Check GPU device supports the CUDA capabilities required.
# kernel: python3
# cpu: 2
# memory: 4
# gpu: 1
- type: create_job
name: Accel_Finetuning_Base_Job
short_summary: Create Template Job for creating finetuning tasks
entity_label: accel_fine_tune_job_template
script: ft/scripts/accel_fine_tune_base_script.py
arguments: None
long_summary: Create Template Job for creating accelerator-based finetuning tasks. This job is used as the template for creating and launching fine-tuning tasks in the application.
cpu: 2
memory: 8
gpu: 1
environment:
TASK_TYPE: CREATE/RUN_JOB
MLFLOW_FLATTEN_PARAMS: true
PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
- type: create_job
name: Mlflow_Evaluation_Base_Job
short_summary: Create Template Job for creating mlflow evaluation tasks
entity_label: mlflow_evaluation_job_template
script: ft/scripts/mlflow_evaluation_base_script.py
arguments: None
long_summary: Create Template Job for creating mlflow evaluation tasks. This job is used as the template for creating and launching mlflow evaluation tasks in the application.
cpu: 2
memory: 8
gpu: 1
environment:
TASK_TYPE: CREATE/RUN_JOB
- type: run_session
name: Initialize Project Defaults
script: bin/initialize-project-defaults.py
short_summary: Initialize Project Defaults
long_summary: Initialize default datasets, prompts, models, adapters, etc., shipped with this version of the Studio.
kernel: python3
cpu: 2
memory: 8
- type: create_model
name: Ticketing Agent Model
entity_label: ticket_agent_adapter_model
description: This sample model API endpoint is used to generate support ticket intent
short_summary: Create the sample support ticket intent model api endpoint
access_key_environment_variable: TICKETING_MODEL_ACCESS_KEY
default_resources:
cpu: 2
memory: 4
gpu: 1
default_replication_policy:
type: fixed
num_replicas: 1
- type: build_model
name: Build Sample Ticketing Agent Adapter Model
short_summary: Build Sample Ticketing Agent Adapter Model
entity_label: ticket_agent_adapter_model
comment: Build Sample Ticketing Agent Adapter Model
examples:
- request:
prompt: "You are an event ticketing customer LLM chatbot responsible for generating a one-word, snake_case action, based on a customer input. Please provide the action below based on the input from the customer.\\n\\n### CUSTOMER: I want to cancel my concert tickets\\n### ACTION:"
response: ""
target_file_path: examples/ticketing-agent-app/mistral-cml-model.py
target_function_name: api_wrapper
- type: deploy_model
name: ticket_agent_adapter_model
short_summary: Deploy Sample Ticketing Agent Adapter Model
entity_label: ticket_agent_adapter_model
cpu: 2
memory: 4
gpu: 1
- type: start_application
name: Sample-Ticketing-Agent-Application
short_summary: Start Sample Ticketing Agent Application
subdomain: sample-ticketing-agent
script: examples/ticketing-agent-app/ticketing-agent-launch.py
long_summary: This application launcher launches the sample ticketing streamlit application.
cpu: 2
memory: 8
environment_variables:
TASK_TYPE: START_APPLICATION
- type: start_application
name: Fine Tuning Studio
short_summary: Start Fine Tuning Studio
subdomain: fine-tuning-studio
script: bin/run-app.py
long_summary: This application requires an available GPU to run the LLM model and LoRA adapters.
cpu: 2
memory: 8
gpu: 1
environment_variables:
TASK_TYPE: START_APPLICATION