config.ini

[LoraConfig]
lora_alpha=16
lora_dropout=0.1
r=64
bias=none
task_type=CAUSAL_LM

[TrainingArguments]
num_train_epochs=3
per_device_train_batch_size=4
gradient_accumulation_steps=2
gradient_checkpointing=True
optim=paged_adamw_32bit
logging_steps=10
save_strategy=epoch
learning_rate=2e-4
bf16=True
tf32=True
max_grad_norm=0.3
warmup_ratio=0.03
lr_scheduler_type=constant
output_dir=${Paths:output_dir}

[Models]
model=meta-llama/Llama-2-7b-hf
dataset=databricks/databricks-dolly-15k
embeddings_model=thenlper/gte-small
embeddings_max_length=512
; # Max tokens for our embedding model.  This code is really designed for the gte-*
; series, e.g.: https://huggingface.co/thenlper/gte-small
; but could in theory be generated to work with other models I suspect.

[Paths]
base_dir=/work3/s212722/herd
dataset_dir=${base_dir}/datasets
cache_dir=${base_dir}/cache
output_dir=${base_dir}/${Models:model}
experts_dir=experts/
experts_file=experts.json