diff --git a/README.md b/README.md index 244ecf6b38c39f2c49719ecdf761a81deb5d7aa3..94f50daddf79d995c98dfff3498efeff585c78f8 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ python train_rm.py --load_model "./out_sft/rwkv-190.pth" --wandb "" --proj_dir ### PPO Model (Reinforcement learning from Human Feedback) ``` -python train_rm.py --load_sft_model "./out_sft/rwkv-190.pth" --load_rm_model "./out_rm/rm-2.pth" --wandb "" \ +python train_ppo.py --load_sft_model "./out_sft/rwkv-190.pth" --load_rm_model "./out_rm/rm-2.pth" --wandb "" \ --proj_dir "out_rlhf" \ --data_file "data/rm_mock_data.csv" --data_type "utf-8" --vocab_size 50277 \ --ctx_len 1024 --epoch_steps 200 --epoch_count 1000 --epoch_begin 0 --epoch_save 2 \