ci test augment (#140)

* bug fixed: checkpoint return argument * change dummy data
TUDB-Labs · Dec 25, 2023 · 9e75340 · 9e75340
1 parent 56677eb
commit 9e75340
Show file tree

Hide file tree

Showing 4 changed files with 77 additions and 74 deletions.
diff --git a/.github/workflows/ci_script.py b/.github/workflows/ci_script.py
@@ -8,7 +8,6 @@ def inference_llama(base_model_name_or_path, lora_weights_path, prompt, device):
     tokenizer = LlamaTokenizer.from_pretrained(base_model_name_or_path)
     model = LlamaForCausalLM.from_pretrained(
         base_model_name_or_path,
-        load_in_8bit=True,
         torch_dtype=torch.float16,
         device_map=device,
     )
@@ -17,20 +16,24 @@ def inference_llama(base_model_name_or_path, lora_weights_path, prompt, device):
 
 
 def inference_chatglm(base_model_name_or_path, lora_weights_path, prompt, device):
-    tokenizer = AutoTokenizer.from_pretrained(base_model_name_or_path, trust_remote_code=True)
-    model = AutoModel.from_pretrained(base_model_name_or_path, trust_remote_code=True).to(device)
+    tokenizer = AutoTokenizer.from_pretrained(
+        base_model_name_or_path, trust_remote_code=True)
+    model = AutoModel.from_pretrained(
+        base_model_name_or_path, trust_remote_code=True).to(device)
     inference(tokenizer, model, device, prompt, lora_weights_path)
 
 
 def inference(tokenizer, model, device, prompt, lora_weights_path):
-    model = PeftModel.from_pretrained(model, lora_weights_path, torch_dtype=torch.float16)
+    model = PeftModel.from_pretrained(
+        model, lora_weights_path, torch_dtype=torch.float16)
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     with torch.inference_mode():
         outputs = model.generate(
             input_ids=input_ids,
-            max_new_tokens=100, do_sample=True, top_p=0.9, temperature=0.5
+            max_new_tokens=100, do_sample=True, top_p=1, temperature=1
         )
-        output = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]
+        output = tokenizer.batch_decode(outputs.detach().cpu(
+        ).numpy(), skip_special_tokens=True)[0][len(prompt):]
 
         print(f"Prompt:\n{prompt}\n")
         print(f"Generated:\n{output}")

diff --git a/.github/workflows/run_on_dev.yml b/.github/workflows/run_on_dev.yml
@@ -24,12 +24,12 @@ jobs:
       - name: finetune llama-7b
         run: |
           cd /workspace/multi-lora-fine-tune
-          python mlora.py --base_model /data/llama-7b-hf --config ./config/finetune.json --load_8bit
+          python mlora.py --base_model /data/llama-7b-hf --config ./config/dummy.json --load_8bit
 
       - name: test inference with lora
         run: |
           cd /workspace/multi-lora-fine-tune
-          python .github/workflows/ci_script.py "llama" "/data/llama-7b-hf" "./lora_0" "Say something."
+          python .github/workflows/ci_script.py "llama" "/data/llama-7b-hf" "./lora_1" "What is m-LoRA?"
 
   test-vicuna-7b:
     runs-on: self-hosted

diff --git a/config/dummy.json b/config/dummy.json
@@ -15,7 +15,7 @@
             "batch_size": 64,
             "micro_batch_size": 64,
             "test_batch_size": 64,
-            "num_epochs": 100,
+            "num_epochs": 50,
             "r": 8,
             "alpha": 16,
             "dropout": 0.05,
@@ -42,7 +42,7 @@
             "batch_size": 64,
             "micro_batch_size": 64,
             "test_batch_size": 64,
-            "num_epochs": 100,
+            "num_epochs": 50,
             "r": 32,
             "alpha": 64,
             "dropout": 0.05,