diff --git a/.gitignore b/.gitignore
index 3e563d1d..83339037 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,13 @@ __pycache__
 dist
 .venv
 
+# Byte-compiled / optimized / DLL files
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
 # Log
 *.log
 *.log.*
@@ -33,4 +40,157 @@ tests/state_of_the_union.txt
 
 # Build
 build
-!dummy_file
\ No newline at end of file
+!dummy_file
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
\ No newline at end of file
diff --git a/llm_rl/requirements.txt b/llm_rl/requirements.txt
index 17482bd2..840d2f2d 100644
--- a/llm_rl/requirements.txt
+++ b/llm_rl/requirements.txt
@@ -1,11 +1,9 @@
-packaging
-wheel
 torch>=1.13.1
 transformers>=4.31.0,<4.35.0
 datasets>=2.12.0
 accelerate>=0.21.0
 peft>=0.4.0
-trl==0.7.2
+trl>=0.7.2
 gradio>=3.38.0,<4.0.0
 scipy
 sentencepiece
@@ -20,8 +18,3 @@ pydantic
 fastapi
 sse-starlette
 matplotlib
-py-cpuinfo
-deepspeed
-bitsandbytes>=0.39.0
-flash-attn
-wandb
\ No newline at end of file
diff --git a/llm_rl/reward_model.sh b/llm_rl/reward_model.sh
index 873b07a5..3068fb43 100644
--- a/llm_rl/reward_model.sh
+++ b/llm_rl/reward_model.sh
@@ -1,15 +1,16 @@
-CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
+python src/train_bash.py \
     --stage rm \
-    --model_name_or_path meta-llama/Llama-2-13b-hf \
+    --model_name_or_path meta-llama/Llama-2-13b \
     --do_train \
     --dataset comparison_gpt4_en \
     --template default \
     --finetuning_type lora \
     --lora_target q_proj,v_proj \
     --resume_lora_training False \
-    --output_dir ./llama-2-13b-rm_cache \
-    --per_device_train_batch_size 8 \
-    --gradient_accumulation_steps 8 \
+    --checkpoint_dir ./llama-2-13b-rm \
+    --output_dir ./llama-2-13b-rm \
+    --per_device_train_batch_size 2 \
+    --gradient_accumulation_steps 4 \
     --lr_scheduler_type cosine \
     --logging_steps 10 \
     --save_steps 1000 \
@@ -17,7 +18,4 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
     --num_train_epochs 1.0 \
     --plot_loss \
     --fp16 \
-    --use_auth_token True \
-    --wandb_token "99caa13ec9552adf0e92e5c30021307ce3cf7fa4" \
-    --hf_auth_token "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" \
-    --deepspeed ./deepspeed_config_s2.json 
+    --hf_auth_token "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"
\ No newline at end of file
diff --git a/llm_rl/src/llmtuner/extras/template.py b/llm_rl/src/llmtuner/extras/template.py
index 508c2ce0..401750ce 100644
--- a/llm_rl/src/llmtuner/extras/template.py
+++ b/llm_rl/src/llmtuner/extras/template.py
@@ -499,19 +499,6 @@ def get_template_and_fix_tokenizer(
     sep=[]
 )
 
-register_template(
-    name="llama2-sotopia",
-    prefix=[
-        "{{system}}"
-    ],
-    prompt=[
-        "[INST] {{query}} [/INST]"
-    ],
-    system=(
-        ""
-    ),
-    sep=[]
-)
 
 r"""
 Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
diff --git a/llm_rl/src/llmtuner/hparams/finetuning_args.py b/llm_rl/src/llmtuner/hparams/finetuning_args.py
index 4479ae6b..d5ef323d 100644
--- a/llm_rl/src/llmtuner/hparams/finetuning_args.py
+++ b/llm_rl/src/llmtuner/hparams/finetuning_args.py
@@ -1,4 +1,3 @@
-from typing import List
 import json
 from typing import Literal, Optional
 from dataclasses import asdict, dataclass, field
@@ -52,10 +51,6 @@ class FinetuningArguments:
                   Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \
                   LLaMA-2, InternLM, XVERSE choices: the same as LLaMA."}
     )
-    lora_bias: Optional[str] = field(
-        default="none",
-        metadata={"help": "The lora_bias option from bitsandbytes."}
-    )
     additional_target: Optional[str] = field(
         default=None,
         metadata={"help": "Name(s) of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint."}
@@ -88,18 +83,6 @@ class FinetuningArguments:
         default=0,
         metadata={"help": "The alpha parameter to control the noise magnitude in NEFTune."}
     )
-    wandb_token: Optional[str] = field(
-        default=None,
-        metadata={"help": "The login api token for wandb."}
-    )
-    wandb_project: Optional[str] = field(
-        default=None,
-        metadata={"help": "The project name for the current wandb log."}
-    )
-    wandb_tags: Optional[List[str]] = field(
-        default=None,
-        metadata={"help": "The tag for the current wandb run."}
-    )
 
     def __post_init__(self):
         if isinstance(self.lora_target, str): # support custom target modules/layers of LoRA
diff --git a/llm_rl/src/llmtuner/hparams/model_args.py b/llm_rl/src/llmtuner/hparams/model_args.py
index 531e37a6..7c25fad1 100644
--- a/llm_rl/src/llmtuner/hparams/model_args.py
+++ b/llm_rl/src/llmtuner/hparams/model_args.py
@@ -1,4 +1,3 @@
-from torch import float16, bfloat16, float32
 from typing import Literal, Optional
 from dataclasses import dataclass, field
 
@@ -75,21 +74,9 @@ class ModelArguments:
         default=None,
         metadata={"help": "Path to the directory to save the exported model."}
     )
-    qlora_compute_dtype: Optional[str] = field(
-        default="fp32",
-        metadata={"help": "The compute_dtype option from bitsandbytes."}
-    )
-    use_custom_callback: Optional[bool] = field(
-        default=False,
-        metadata={"help": "Whether enable custom callbacks."}
-    )
-    call_back_save_epochs: Optional[int] = field(
-        default=1,
-        metadata={"help": "The number of epochs before each call back save."}
-    )
-    
 
     def __post_init__(self):
+        self.compute_dtype = None
         self.model_max_length = None
 
         if self.split_special_tokens and self.use_fast_tokenizer:
@@ -104,10 +91,3 @@ def __post_init__(self):
         if self.use_auth_token == True and self.hf_auth_token is not None:
             from huggingface_hub.hf_api import HfFolder # lazy load
             HfFolder.save_token(self.hf_auth_token)
-            
-        if self.qlora_compute_dtype == "bf16":
-            self.compute_dtype = bfloat16
-        elif self.qlora_compute_dtype == "fp16":
-            self.compute_dtype = float16
-        else:
-            self.compute_dtype = float32
\ No newline at end of file
diff --git a/llm_rl/src/llmtuner/tuner/core/adapter.py b/llm_rl/src/llmtuner/tuner/core/adapter.py
index 22633d54..4fcc6e62 100644
--- a/llm_rl/src/llmtuner/tuner/core/adapter.py
+++ b/llm_rl/src/llmtuner/tuner/core/adapter.py
@@ -89,7 +89,6 @@ def init_adapter(
                 lora_alpha=finetuning_args.lora_alpha,
                 lora_dropout=finetuning_args.lora_dropout,
                 target_modules=target_modules,
-                bias=finetuning_args.lora_bias,
                 modules_to_save=finetuning_args.additional_target
             )
             model = get_peft_model(model, lora_config)
diff --git a/llm_rl/src/llmtuner/tuner/core/loader.py b/llm_rl/src/llmtuner/tuner/core/loader.py
index 2df9baf1..e77c4945 100644
--- a/llm_rl/src/llmtuner/tuner/core/loader.py
+++ b/llm_rl/src/llmtuner/tuner/core/loader.py
@@ -74,7 +74,6 @@ def load_model_and_tokenizer(
         padding_side="right", # training with left-padded tensors in fp16 precision may cause overflow
         **config_kwargs
     )
-    tokenizer.pad_token = tokenizer.unk_token
 
     if finetuning_args.finetuning_type != "lora" and model_args.checkpoint_dir is not None:
         model_to_load = model_args.checkpoint_dir[0]
diff --git a/llm_rl/src/llmtuner/tuner/core/utils.py b/llm_rl/src/llmtuner/tuner/core/utils.py
index 03043e20..d9a1aac9 100644
--- a/llm_rl/src/llmtuner/tuner/core/utils.py
+++ b/llm_rl/src/llmtuner/tuner/core/utils.py
@@ -9,16 +9,10 @@
     from transformers.modeling_utils import PreTrainedModel
     from llmtuner.hparams import FinetuningArguments
 
-import torch.distributed as dist
-import os
 
 logger = get_logger(__name__)
 
-def is_first_node():
-    world_rank = dist.get_rank() if torch.distributed.is_initialized() else 0
-    local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0
-    return world_rank == local_rank == 0
-  
+
 def find_all_linear_modules(
     model: "PreTrainedModel",
     quantization_bit: Optional[int] = None,
diff --git a/llm_rl/src/llmtuner/tuner/rm/trainer.py b/llm_rl/src/llmtuner/tuner/rm/trainer.py
index 94549f18..80502937 100644
--- a/llm_rl/src/llmtuner/tuner/rm/trainer.py
+++ b/llm_rl/src/llmtuner/tuner/rm/trainer.py
@@ -38,7 +38,7 @@ def compute_loss(
         See: https://github.com/huggingface/transformers/blob/v4.30.2/src/transformers/trainer.py#L3509
         """
         # Compute rewards
-        _, _, values = model(**inputs, output_hidden_states=True, return_dict=True) # (lm_logits, loss, value)
+        _, _, values = model(**inputs, output_hidden_states=True, return_dict=True)
         if values.size(0) != inputs["input_ids"].size(0): # adapt to chatglm2
             values = torch.transpose(values, 0, 1)
 
diff --git a/llm_rl/src/llmtuner/tuner/sft/workflow.py b/llm_rl/src/llmtuner/tuner/sft/workflow.py
index 332ceff6..8d53605d 100644
--- a/llm_rl/src/llmtuner/tuner/sft/workflow.py
+++ b/llm_rl/src/llmtuner/tuner/sft/workflow.py
@@ -10,8 +10,6 @@
 from llmtuner.tuner.core import load_model_and_tokenizer
 from llmtuner.tuner.sft.metric import ComputeMetrics
 from llmtuner.tuner.sft.trainer import CustomSeq2SeqTrainer
-from llmtuner.tuner.core.utils import is_first_node
-from llmtuner.tuner.sft.custom_callback import SaveModelCallback
 
 if TYPE_CHECKING:
     from transformers import TrainerCallback
@@ -29,11 +27,6 @@ def run_sft(
     dataset = get_dataset(model_args, data_args)
     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args, training_args.do_train, stage="sft")
     dataset = preprocess_dataset(dataset, tokenizer, data_args, training_args, stage="sft")
-    
-    if training_args.gradient_checkpointing:
-        model.enable_input_require_grads()
-        model.gradient_checkpointing_enable()
-        model.config.use_cache = False
 
     if training_args.predict_with_generate:
         tokenizer.padding_side = "left" # use left-padding in generation
@@ -51,11 +44,6 @@ def run_sft(
         generation_num_beams=data_args.eval_num_beams or training_args.generation_num_beams
     ))
     training_args = Seq2SeqTrainingArguments(**training_args_dict)
-    if is_first_node():
-        training_args.report_to = ["wandb"]
-        
-    if model_args.use_custom_callback:
-        callbacks.append(SaveModelCallback(model_args.call_back_save_epochs, training_args.output_dir))
 
     # Initialize our Trainer
     trainer = CustomSeq2SeqTrainer(
diff --git a/llm_rl/src/llmtuner/tuner/tune.py b/llm_rl/src/llmtuner/tuner/tune.py
index 054a6b1c..4eb7f78f 100644
--- a/llm_rl/src/llmtuner/tuner/tune.py
+++ b/llm_rl/src/llmtuner/tuner/tune.py
@@ -9,9 +9,6 @@
 from llmtuner.tuner.ppo import run_ppo
 from llmtuner.tuner.dpo import run_dpo
 
-from llmtuner.tuner.core.utils import is_first_node
-import wandb
-
 if TYPE_CHECKING:
     from transformers import TrainerCallback
 
@@ -22,10 +19,7 @@
 def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["TrainerCallback"]] = None):
     model_args, data_args, training_args, finetuning_args, generating_args = get_train_args(args)
     callbacks = [LogCallback()] if callbacks is None else callbacks
-    if is_first_node():
-        wandb.login(key=finetuning_args.wandb_token)
-        wandb.init(project=finetuning_args.wandb_project, tags=[*finetuning_args.wandb_tags] if finetuning_args.wandb_tags else None)
-    
+
     if finetuning_args.stage == "pt":
         run_pt(model_args, data_args, training_args, finetuning_args, callbacks)
     elif finetuning_args.stage == "sft":