From 887ca941694692c1b1ad8be448fc7c5310586b59 Mon Sep 17 00:00:00 2001 From: u010280923 Date: Mon, 13 Mar 2023 15:33:41 +0800 Subject: [PATCH] reward model finished --- README.md | 2 +- forward_demo.py | 84 ------------------------------------------------ train_rm_demo.py | 62 ----------------------------------- 3 files changed, 1 insertion(+), 147 deletions(-) delete mode 100644 forward_demo.py delete mode 100644 train_rm_demo.py diff --git a/README.md b/README.md index 63361e9..381a7d7 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ python train_sft.py --load_model "rwkv-190.pth" --wandb "" --proj_dir "out_sft" ``` python train_rm.py --load_model "rwkv-190.pth" --wandb "" --proj_dir "out_rm" \ --data_file "data/rm_mock_data.csv" --data_type "utf-8" --vocab_size 50277 \ ---ctx_len 2048 --epoch_steps 200 --epoch_count 1000 --epoch_begin 0 --epoch_save 2 \ +--ctx_len 1024 --epoch_steps 200 --epoch_count 1000 --epoch_begin 0 --epoch_save 2 \ --micro_bsz 2 --n_layer 24 --n_embd 2048 --pre_ffn 0 --head_qk 0 \ --lr_init 1e-5 --lr_final 1e-5 --warmup_steps 0 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 \ --accelerator gpu --devices 1 --precision bf16 --strategy deepspeed_stage_2_offload --grad_cp 1 \ diff --git a/forward_demo.py b/forward_demo.py deleted file mode 100644 index 58920ec..0000000 --- a/forward_demo.py +++ /dev/null @@ -1,84 +0,0 @@ -import os, sys, torch -import numpy as np -np.set_printoptions(precision=4, suppress=True, linewidth=200) - -# current_path = os.path.dirname(os.path.abspath(__file__)) -# sys.path.append(f'{current_path}/rwkv_pip_package/src') - -# Tune these below (test True/False for all of them) to find the fastest setting: -# torch._C._jit_set_profiling_executor(True) -# torch._C._jit_set_profiling_mode(True) -# torch._C._jit_override_can_fuse_on_cpu(True) -# torch._C._jit_override_can_fuse_on_gpu(True) -# torch._C._jit_set_texpr_fuser_enabled(False) -# torch._C._jit_set_nvfuser_enabled(False) - -######################################################################################################## -# -# Use '/' in model path, instead of '\'. Use ctx4096 models if you need long ctx. -# -# fp16 = good for GPU (!!! DOES NOT support CPU !!!) -# fp32 = good for CPU -# bf16 = worse accuracy, supports CPU -# xxxi8 (example: fp16i8) = xxx with int8 quantization to save 50% VRAM/RAM, slower, slightly less accuracy -# -# Read https://pypi.org/project/rwkv/ for Strategy Guide -# -######################################################################################################## -# set these before import RWKV -os.environ['RWKV_JIT_ON'] = '1' -os.environ["RWKV_CUDA_ON"] = '0' # if '1' then compile CUDA kernel for seq mode (much faster) - -# from rwkv.model import RWKV # pip install rwkv -from src.rlhf.rwkv.model import RWKV -# model = RWKV(model='./model/rwkv-190.pth', strategy='cpu fp32') -model = RWKV(model='./model/RWKV-4-Pile-169M-20220807-8023.pth', strategy='cpu fp32') - -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16i8') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32 *3 -> cuda fp16 *6+') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cpu fp32') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cuda fp16') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cuda fp16 *8 -> cpu fp32') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cuda:0 fp16 -> cuda:1 fp16 -> cpu fp32 *1') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cuda fp16 *6+') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-14b/RWKV-4-Pile-14B-20230213-8019', strategy='cuda fp16 *0+ -> cpu fp32 *1') -# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-3b/RWKV-4-Pile-3B-20221110-ctx4096', strategy='cuda:0 fp16 *25 -> cuda:1 fp16') - -out, state, token_embed = model.forward([187, 510, 1563, 310, 247], None) -print(out.detach().cpu().numpy()) # get logits -# out, state = model.forward([187, 510], None) -# out, state = model.forward([1563], state) # RNN has state (use deepcopy to clone states) -# out, state = model.forward([310, 247], state) -# print(out.detach().cpu().numpy()) # same result as above - -import ipdb -ipdb.set_trace() - -# print('\n') - -# from src.rlhf.rwkv.utils import PIPELINE, PIPELINE_ARGS -# pipeline = PIPELINE(model, "20B_tokenizer.json") - -# ctx = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese." -# print(ctx, end='') - -# def my_print(s): -# print(s, end='', flush=True) - -# # For alpha_frequency and alpha_presence, see "Frequency and presence penalties": -# # https://platform.openai.com/docs/api-reference/parameter-details - -# args = PIPELINE_ARGS(temperature = 1.0, top_p = 0.7, -# alpha_frequency = 0.25, -# alpha_presence = 0.25, -# token_ban = [0], # ban the generation of some tokens -# token_stop = []) # stop generation whenever you see any token here - -# ######################################################################################################## -# # 1. set os.environ["RWKV_CUDA_ON"] = '1' if possible, for faster preprocess of a long ctx. -# # 2. Reuse the state (use deepcopy to clone it) when you are running the same ctx multiple times. -# pipeline.generate(ctx, token_count=200, args=args, callback=my_print) - -# print('\n') \ No newline at end of file diff --git a/train_rm_demo.py b/train_rm_demo.py deleted file mode 100644 index 3b2a21e..0000000 --- a/train_rm_demo.py +++ /dev/null @@ -1,62 +0,0 @@ -''' -@File : train_rm_demo.py -@Time : 2023/03/10 00:54:57 -@Author : Lu Xin -@Contact : luxin@csdn.net -''' - -# here put the import lib - -import torch - -from tqdm import tqdm - -from src.rlhf.reward import RewardModel -from src.rlhf.rwkv.model import RWKV - -def loss_function(prefer_reward, alter_reward): - return -torch.mean(torch.log(torch.sigmoid(alter_reward - prefer_reward))) - -model = "./model/RWKV-4-Pile-169M-20220807-8023.pth" -strategy = "cpu fp32" -rwkv_model = RWKV(model, strategy) - -reward_model = RewardModel( - rwkv_model -) - -import ipdb -ipdb.set_trace() - -# as used in the InstructGPT paper -optimizer = torch.optim.Adam( - reward_model.parameters(), lr=1e-5, betas=(0.9, 0.95)) - -# 假数据 -dim = 20000 -prompt = torch.randint(0, dim, (1, 50)) -prefer_response = torch.randint(0, dim, (1, 50)) -alter_response = torch.randint(0, dim, (1, 50)) - -prefer_pair = torch.concat((prompt, prefer_response), dim=1) -alter_pair = torch.concat((prompt, alter_response), dim=1) - -prompt_mask = torch.cat((torch.ones(1, 50).bool(), torch.zeros(1, 50).bool()), dim=1) - -for epoch in range(100): - # 计算奖励 - prefer_reward = reward_model(prefer_pair, prompt_mask = prompt_mask) - alter_reward = reward_model(alter_pair, prompt_mask = prompt_mask) - # print(f"prefer_reward: {prefer_reward}") - # print(f"alter_reward: {alter_reward}") - - # train - loss = loss_function(prefer_reward, alter_reward) - print(f"loss: {loss}") - - # Backward pass - loss.backward() - optimizer.step() - - # Zero the gradients - optimizer.zero_grad() -- GitLab