提交 82d6d979 编写于 作者: U u010280923

bug fixed

上级 a2aabac7
......@@ -77,7 +77,7 @@ python train_rm.py --load_model "./out_sft/rwkv-190.pth" --wandb "" --proj_dir
### PPO Model (Reinforcement learning from Human Feedback)
```
python train_ppo.py --load_model "./out_sft/rwkv-190.pth" --load_rm_model "./out_rm/rm-2.pth" --wandb "" \
python train_ppo.py --load_sft_model "./out_sft/rwkv-190.pth" --load_rm_model "./out_rm/rm-2.pth" --wandb "" \
--proj_dir "out_rlhf" \
--data_file "data/rm_mock_data.csv" --data_type "utf-8" --vocab_size 50277 \
--ctx_len 1024 --epoch_steps 200 --epoch_count 1000 --epoch_begin 0 --epoch_save 2 \
......
......@@ -57,7 +57,7 @@ if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--load_model", default="", type=str) # full path, with .pth
parser.add_argument("--load_sft_model", default="", type=str) # full path, with .pth
parser.add_argument("--load_rm_model", default="", type=str) # full path, with .pth
parser.add_argument("--wandb", default="", type=str) # wandb project name. if "" then don't use wandb
parser.add_argument("--proj_dir", default="out", type=str)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册