-
Notifications
You must be signed in to change notification settings - Fork 125
/
Copy pathllama_ppo.sh
executable file
·51 lines (44 loc) · 1.88 KB
/
llama_ppo.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env bash
#
# Copyright 2024 PKU-Alignment Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
ACTOR_MODEL_NAME_OR_PATH="meta-llama/Llama-3.1-8B-Instruct" # actor model path
CRITIC_MODEL_NAME_OR_PATH="../outputs/llama_rm" # critic model path
REWARD_MODEL_NAME_OR_PATH="../outputs/llama_rm" # reward model path
TRAIN_DATASETS="PKU-Alignment/PKU-SafeRLHF-single-dimension" # dataset path
TRAIN_TEMPLATE="PKUSafeRLHF" # rlhf dataset template
TRAIN_SPLIT="train" # split the rlhf dataset
PTX_DATASETS="tatsu-lab/alpaca" # sft dataset path
PTX_TEMPLATE="Llama3" # sft dataset template
PTX_SPLIT="train" # split the sft dataset
OUTPUT_DIR="../output/ppo" # output dir
# For wandb online logging
export WANDB_API_KEY=""
# Source the setup script
source ./setup.sh
# Execute deepspeed command
deepspeed \
--master_port ${MASTER_PORT} \
--module align_anything.trainers.text_to_text.ppo \
--actor_model_name_or_path ${ACTOR_MODEL_NAME_OR_PATH} \
--reward_model_name_or_path ${REWARD_MODEL_NAME_OR_PATH} \
--reward_critic_model_name_or_path ${CRITIC_MODEL_NAME_OR_PATH} \
--train_datasets ${TRAIN_DATASETS} \
--train_split ${TRAIN_SPLIT} \
--train_template ${TRAIN_TEMPLATE} \
--ptx_split ${PTX_SPLIT} \
--ptx_datasets ${PTX_DATASETS} \
--ptx_template ${PTX_TEMPLATE} \
--output_dir ${OUTPUT_DIR}