提交 4b0ff7c7 编写于 作者: U u010280923

debug

上级 580a99ad
data
model
.DS_Store
.idea
\ No newline at end of file
......@@ -28,12 +28,15 @@ np.set_printoptions(precision=4, suppress=True, linewidth=200)
# set these before import RWKV
os.environ['RWKV_JIT_ON'] = '1'
os.environ["RWKV_CUDA_ON"] = '0' # if '1' then compile CUDA kernel for seq mode (much faster)
os.environ["RWKV_T_MAX"] = '1024'
from src.model import RWKV # pip install rwkv
# from rwkv.model import RWKV # pip install rwkv
from src.rlhf.rwkv import RWKV
# model = RWKV(model='./model/rwkv-190.pth', strategy='cpu fp32')
model = RWKV(model='./model/RWKV-4-Pile-169M-20220807-8023.pth', strategy='cpu fp32')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16i8')
model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32 *3 -> cuda fp16 *6+')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cpu fp32')
# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cuda fp16')
......@@ -45,34 +48,37 @@ model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-2022
out, state = model.forward([187, 510, 1563, 310, 247], None)
print(out.detach().cpu().numpy()) # get logits
out, state = model.forward([187, 510], None)
out, state = model.forward([1563], state) # RNN has state (use deepcopy to clone states)
out, state = model.forward([310, 247], state)
print(out.detach().cpu().numpy()) # same result as above
# out, state = model.forward([187, 510], None)
# out, state = model.forward([1563], state) # RNN has state (use deepcopy to clone states)
# out, state = model.forward([310, 247], state)
# print(out.detach().cpu().numpy()) # same result as above
print('\n')
import ipdb
ipdb.set_trace()
from src.utils import PIPELINE, PIPELINE_ARGS
pipeline = PIPELINE(model, "20B_tokenizer.json")
# print('\n')
ctx = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese."
print(ctx, end='')
# from src.utils import PIPELINE, PIPELINE_ARGS
# pipeline = PIPELINE(model, "20B_tokenizer.json")
def my_print(s):
print(s, end='', flush=True)
# ctx = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese."
# print(ctx, end='')
# For alpha_frequency and alpha_presence, see "Frequency and presence penalties":
# https://platform.openai.com/docs/api-reference/parameter-details
# def my_print(s):
# print(s, end='', flush=True)
args = PIPELINE_ARGS(temperature = 1.0, top_p = 0.7,
alpha_frequency = 0.25,
alpha_presence = 0.25,
token_ban = [0], # ban the generation of some tokens
token_stop = []) # stop generation whenever you see any token here
# # For alpha_frequency and alpha_presence, see "Frequency and presence penalties":
# # https://platform.openai.com/docs/api-reference/parameter-details
########################################################################################################
# 1. set os.environ["RWKV_CUDA_ON"] = '1' if possible, for faster preprocess of a long ctx.
# 2. Reuse the state (use deepcopy to clone it) when you are running the same ctx multiple times.
pipeline.generate(ctx, token_count=200, args=args, callback=my_print)
# args = PIPELINE_ARGS(temperature = 1.0, top_p = 0.7,
# alpha_frequency = 0.25,
# alpha_presence = 0.25,
# token_ban = [0], # ban the generation of some tokens
# token_stop = []) # stop generation whenever you see any token here
# ########################################################################################################
# # 1. set os.environ["RWKV_CUDA_ON"] = '1' if possible, for faster preprocess of a long ctx.
# # 2. Reuse the state (use deepcopy to clone it) when you are running the same ctx multiple times.
# pipeline.generate(ctx, token_count=200, args=args, callback=my_print)
print('\n')
\ No newline at end of file
# print('\n')
\ No newline at end of file
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册