提交 4b0ff7c7 编写于 作者: U u010280923

debug

上级 580a99ad
data data
model
.DS_Store .DS_Store
.idea .idea
\ No newline at end of file
...@@ -28,12 +28,15 @@ np.set_printoptions(precision=4, suppress=True, linewidth=200) ...@@ -28,12 +28,15 @@ np.set_printoptions(precision=4, suppress=True, linewidth=200)
# set these before import RWKV # set these before import RWKV
os.environ['RWKV_JIT_ON'] = '1' os.environ['RWKV_JIT_ON'] = '1'
os.environ["RWKV_CUDA_ON"] = '0' # if '1' then compile CUDA kernel for seq mode (much faster) os.environ["RWKV_CUDA_ON"] = '0' # if '1' then compile CUDA kernel for seq mode (much faster)
os.environ["RWKV_T_MAX"] = '1024'
from src.model import RWKV # pip install rwkv # from rwkv.model import RWKV # pip install rwkv
from src.rlhf.rwkv import RWKV
# model = RWKV(model='./model/rwkv-190.pth', strategy='cpu fp32')
model = RWKV(model='./model/RWKV-4-Pile-169M-20220807-8023.pth', strategy='cpu fp32')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16') # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16i8') # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16i8')
model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32') # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32 *3 -> cuda fp16 *6+') # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32 *3 -> cuda fp16 *6+')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cpu fp32') # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cpu fp32')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cuda fp16') # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cuda fp16')
...@@ -45,34 +48,37 @@ model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-2022 ...@@ -45,34 +48,37 @@ model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-2022
out, state = model.forward([187, 510, 1563, 310, 247], None) out, state = model.forward([187, 510, 1563, 310, 247], None)
print(out.detach().cpu().numpy()) # get logits print(out.detach().cpu().numpy()) # get logits
out, state = model.forward([187, 510], None) # out, state = model.forward([187, 510], None)
out, state = model.forward([1563], state) # RNN has state (use deepcopy to clone states) # out, state = model.forward([1563], state) # RNN has state (use deepcopy to clone states)
out, state = model.forward([310, 247], state) # out, state = model.forward([310, 247], state)
print(out.detach().cpu().numpy()) # same result as above # print(out.detach().cpu().numpy()) # same result as above
print('\n') import ipdb
ipdb.set_trace()
from src.utils import PIPELINE, PIPELINE_ARGS # print('\n')
pipeline = PIPELINE(model, "20B_tokenizer.json")
ctx = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese." # from src.utils import PIPELINE, PIPELINE_ARGS
print(ctx, end='') # pipeline = PIPELINE(model, "20B_tokenizer.json")
def my_print(s): # ctx = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese."
print(s, end='', flush=True) # print(ctx, end='')
# For alpha_frequency and alpha_presence, see "Frequency and presence penalties": # def my_print(s):
# https://platform.openai.com/docs/api-reference/parameter-details # print(s, end='', flush=True)
args = PIPELINE_ARGS(temperature = 1.0, top_p = 0.7, # # For alpha_frequency and alpha_presence, see "Frequency and presence penalties":
alpha_frequency = 0.25, # # https://platform.openai.com/docs/api-reference/parameter-details
alpha_presence = 0.25,
token_ban = [0], # ban the generation of some tokens
token_stop = []) # stop generation whenever you see any token here
######################################################################################################## # args = PIPELINE_ARGS(temperature = 1.0, top_p = 0.7,
# 1. set os.environ["RWKV_CUDA_ON"] = '1' if possible, for faster preprocess of a long ctx. # alpha_frequency = 0.25,
# 2. Reuse the state (use deepcopy to clone it) when you are running the same ctx multiple times. # alpha_presence = 0.25,
pipeline.generate(ctx, token_count=200, args=args, callback=my_print) # token_ban = [0], # ban the generation of some tokens
# token_stop = []) # stop generation whenever you see any token here
# ########################################################################################################
# # 1. set os.environ["RWKV_CUDA_ON"] = '1' if possible, for faster preprocess of a long ctx.
# # 2. Reuse the state (use deepcopy to clone it) when you are running the same ctx multiple times.
# pipeline.generate(ctx, token_count=200, args=args, callback=my_print)
print('\n') # print('\n')
\ No newline at end of file \ No newline at end of file
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册