diff --git a/llm_ft/playground/test_embedding/README.md b/llm_ft/playground/test_embedding/README.md
deleted file mode 100644
index 57ac73c5..00000000
--- a/llm_ft/playground/test_embedding/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-## Machine Learning with Embeddings
-You can use embeddings to
-- Evaluate text similarity, see [test_sentence_similarity.py](test_sentence_similarity.py)
-- Build your own classifier, see [test_classification.py](test_classification.py)
-- Search relative texts, see [test_semantic_search.py](test_semantic_search.py)
-
-To these tests, you need to download the data [here](https://www.kaggle.com/datasets/snap/amazon-fine-food-reviews). You also need an OpenAI API key for comparison.
-
-Run with:
-```bash
-cd playground/test_embedding
-python3 test_classification.py
-```
-
-The script will train classifiers based on `vicuna-7b`, `text-similarity-ada-001` and `text-embedding-ada-002` and report the accuracy of each classifier.
diff --git a/llm_ft/playground/test_embedding/test_classification.py b/llm_ft/playground/test_embedding/test_classification.py
deleted file mode 100644
index 393827bb..00000000
--- a/llm_ft/playground/test_embedding/test_classification.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import json
-import os
-
-import numpy as np
-import openai
-import pandas as pd
-import requests
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import classification_report, accuracy_score
-
-
-np.set_printoptions(threshold=10000)
-
-
-def get_embedding_from_api(word, model="vicuna-7b-v1.1"):
-    if "ada" in model:
-        resp = openai.Embedding.create(
-            model=model,
-            input=word,
-        )
-        embedding = np.array(resp["data"][0]["embedding"])
-        return embedding
-
-    url = "http://localhost:8000/v1/embeddings"
-    headers = {"Content-Type": "application/json"}
-    data = json.dumps({"model": model, "input": word})
-
-    response = requests.post(url, headers=headers, data=data)
-    if response.status_code == 200:
-        embedding = np.array(response.json()["data"][0]["embedding"])
-        return embedding
-    else:
-        print(f"Error: {response.status_code} - {response.text}")
-        return None
-
-
-def create_embedding_data_frame(data_path, model, max_tokens=500):
-    df = pd.read_csv(data_path, index_col=0)
-    df = df[["Time", "ProductId", "UserId", "Score", "Summary", "Text"]]
-    df = df.dropna()
-    df["combined"] = (
-        "Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip()
-    )
-    top_n = 1000
-    df = df.sort_values("Time").tail(top_n * 2)
-    df.drop("Time", axis=1, inplace=True)
-
-    df["n_tokens"] = df.combined.apply(lambda x: len(x))
-    df = df[df.n_tokens <= max_tokens].tail(top_n)
-    df["embedding"] = df.combined.apply(lambda x: get_embedding_from_api(x, model))
-    return df
-
-
-def train_random_forest(df):
-    X_train, X_test, y_train, y_test = train_test_split(
-        list(df.embedding.values), df.Score, test_size=0.2, random_state=42
-    )
-
-    clf = RandomForestClassifier(n_estimators=100)
-    clf.fit(X_train, y_train)
-    preds = clf.predict(X_test)
-
-    report = classification_report(y_test, preds)
-    accuracy = accuracy_score(y_test, preds)
-    return clf, accuracy, report
-
-
-input_datapath = "amazon_fine_food_review.csv"
-if not os.path.exists(input_datapath):
-    raise Exception(
-        f"Please download data from: https://www.kaggle.com/datasets/snap/amazon-fine-food-reviews"
-    )
-
-df = create_embedding_data_frame(input_datapath, "vicuna-7b-v1.1")
-clf, accuracy, report = train_random_forest(df)
-print(f"Vicuna-7b-v1.1 accuracy:{accuracy}")
-df = create_embedding_data_frame(input_datapath, "text-similarity-ada-001")
-clf, accuracy, report = train_random_forest(df)
-print(f"text-similarity-ada-001 accuracy:{accuracy}")
-df = create_embedding_data_frame(input_datapath, "text-embedding-ada-002")
-clf, accuracy, report = train_random_forest(df)
-print(f"text-embedding-ada-002 accuracy:{accuracy}")
diff --git a/llm_ft/playground/test_embedding/test_semantic_search.py b/llm_ft/playground/test_embedding/test_semantic_search.py
deleted file mode 100644
index 879b240b..00000000
--- a/llm_ft/playground/test_embedding/test_semantic_search.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import json
-import os
-
-import numpy as np
-import openai
-import pandas as pd
-import requests
-from scipy.spatial.distance import cosine
-
-
-def cosine_similarity(vec1, vec2):
-    try:
-        return 1 - cosine(vec1, vec2)
-    except:
-        print(vec1.shape, vec2.shape)
-
-
-def get_embedding_from_api(word, model="vicuna-7b-v1.1"):
-    if "ada" in model:
-        resp = openai.Embedding.create(
-            model=model,
-            input=word,
-        )
-        embedding = np.array(resp["data"][0]["embedding"])
-        return embedding
-
-    url = "http://localhost:8000/v1/embeddings"
-    headers = {"Content-Type": "application/json"}
-    data = json.dumps({"model": model, "input": word})
-
-    response = requests.post(url, headers=headers, data=data)
-    if response.status_code == 200:
-        embedding = np.array(response.json()["data"][0]["embedding"])
-        return embedding
-    else:
-        print(f"Error: {response.status_code} - {response.text}")
-        return None
-
-
-def create_embedding_data_frame(data_path, model, max_tokens=500):
-    df = pd.read_csv(data_path, index_col=0)
-    df = df[["Time", "ProductId", "UserId", "Score", "Summary", "Text"]]
-    df = df.dropna()
-    df["combined"] = (
-        "Title: " + df.Summary.str.strip() + "; Content: " + df.Text.str.strip()
-    )
-    top_n = 1000
-    df = df.sort_values("Time").tail(top_n * 2)
-    df.drop("Time", axis=1, inplace=True)
-
-    df["n_tokens"] = df.combined.apply(lambda x: len(x))
-    df = df[df.n_tokens <= max_tokens].tail(top_n)
-    df["embedding"] = df.combined.apply(lambda x: get_embedding_from_api(x, model))
-    return df
-
-
-def search_reviews(df, product_description, n=3, pprint=False, model="vicuna-7b-v1.1"):
-    product_embedding = get_embedding_from_api(product_description, model=model)
-    df["similarity"] = df.embedding.apply(
-        lambda x: cosine_similarity(x, product_embedding)
-    )
-
-    results = (
-        df.sort_values("similarity", ascending=False)
-        .head(n)
-        .combined.str.replace("Title: ", "")
-        .str.replace("; Content:", ": ")
-    )
-    if pprint:
-        for r in results:
-            print(r[:200])
-            print()
-    return results
-
-
-def print_model_search(input_path, model):
-    print(f"Model: {model}")
-    df = create_embedding_data_frame(input_path, model)
-    print("search: delicious beans")
-    results = search_reviews(df, "delicious beans", n=5, model=model)
-    print(results)
-    print("search: whole wheat pasta")
-    results = search_reviews(df, "whole wheat pasta", n=5, model=model)
-    print(results)
-    print("search: bad delivery")
-    results = search_reviews(df, "bad delivery", n=5, model=model)
-    print(results)
-
-
-input_datapath = "amazon_fine_food_review.csv"
-if not os.path.exists(input_datapath):
-    raise Exception(
-        f"Please download data from: https://www.kaggle.com/datasets/snap/amazon-fine-food-reviews"
-    )
-
-
-print_model_search(input_datapath, "vicuna-7b-v1.1")
-print_model_search(input_datapath, "text-similarity-ada-001")
-print_model_search(input_datapath, "text-embedding-ada-002")
diff --git a/llm_ft/playground/test_embedding/test_sentence_similarity.py b/llm_ft/playground/test_embedding/test_sentence_similarity.py
deleted file mode 100644
index 0b9a5408..00000000
--- a/llm_ft/playground/test_embedding/test_sentence_similarity.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import json
-import os
-
-import numpy as np
-import openai
-import requests
-from scipy.spatial.distance import cosine
-
-
-def get_embedding_from_api(word, model="vicuna-7b-v1.1"):
-    if "ada" in model:
-        resp = openai.Embedding.create(
-            model=model,
-            input=word,
-        )
-        embedding = np.array(resp["data"][0]["embedding"])
-        return embedding
-
-    url = "http://localhost:8000/v1/embeddings"
-    headers = {"Content-Type": "application/json"}
-    data = json.dumps({"model": model, "input": word})
-
-    response = requests.post(url, headers=headers, data=data)
-    if response.status_code == 200:
-        embedding = np.array(response.json()["data"][0]["embedding"])
-        return embedding
-    else:
-        print(f"Error: {response.status_code} - {response.text}")
-        return None
-
-
-def cosine_similarity(vec1, vec2):
-    return 1 - cosine(vec1, vec2)
-
-
-def print_cosine_similarity(embeddings, texts):
-    for i in range(len(texts)):
-        for j in range(i + 1, len(texts)):
-            sim = cosine_similarity(embeddings[texts[i]], embeddings[texts[j]])
-            print(f"Cosine similarity between '{texts[i]}' and '{texts[j]}': {sim:.2f}")
-
-
-texts = [
-    "The quick brown fox",
-    "The quick brown dog",
-    "The fast brown fox",
-    "A completely different sentence",
-]
-
-embeddings = {}
-for text in texts:
-    embeddings[text] = get_embedding_from_api(text)
-
-print("Vicuna-7B:")
-print_cosine_similarity(embeddings, texts)
-
-for text in texts:
-    embeddings[text] = get_embedding_from_api(text, model="text-similarity-ada-001")
-
-print("text-similarity-ada-001:")
-print_cosine_similarity(embeddings, texts)
-
-for text in texts:
-    embeddings[text] = get_embedding_from_api(text, model="text-embedding-ada-002")
-
-print("text-embedding-ada-002:")
-print_cosine_similarity(embeddings, texts)
diff --git a/llm_ft/pyproject.toml b/llm_ft/pyproject.toml
index b7109f05..1e6cdf58 100644
--- a/llm_ft/pyproject.toml
+++ b/llm_ft/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
 [project.optional-dependencies]
 model_worker = ["accelerate>=0.21", "peft", "sentencepiece", "torch", "transformers>=4.31.0", "protobuf"]
 webui = ["gradio"]
-train = ["einops", "flash-attn>=2.0", "wandb"]
+train = ["einops", "flash-attn>=2.0", "wandb", "deepspeed", "peft", "bitsandbytes", "scipy", "sentencepiece"]
 llm_judge = ["openai", "anthropic>=0.3", "ray"]
 dev = ["black==23.3.0", "pylint==2.8.2"]
 
diff --git a/llm_ft/qlora.sh b/llm_ft/qlora.sh
new file mode 100644
index 00000000..ddd9a752
--- /dev/null
+++ b/llm_ft/qlora.sh
@@ -0,0 +1,25 @@
+deepspeed fastchat/train/train_lora.py \
+    --model_name_or_path ./vicuna-7b-1.5 \
+    --lora_r 8 \
+    --lora_alpha 16 \
+    --lora_dropout 0.05 \
+    --data_path ./data/dummy_conversation.json \
+    --bf16 True \
+    --output_dir ./checkpoints \
+    --num_train_epochs 3 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 1 \
+    --evaluation_strategy "no" \
+    --save_strategy "steps" \
+    --save_steps 1200 \
+    --save_total_limit 100 \
+    --learning_rate 2e-5 \
+    --weight_decay 0. \
+    --warmup_ratio 0.03 \
+    --lr_scheduler_type "cosine" \
+    --logging_steps 1 \
+    --tf32 True \
+    --model_max_length 2048 \
+    --q_lora True \
+    --deepspeed playground/deepspeed_config_s2.json
\ No newline at end of file