debug

4b0ff7c7 · u010280923 · 580a99ad · 4b0ff7c7 · 4b0ff7c7 · 4b0ff7c7
5 changed file
--- a/.gitignore
+++ b/.gitignore
 data
+model
 .DS_Store
 .idea
\ No newline at end of file
--- a/forward_demo.py
+++ b/forward_demo.py
@@ -28,12 +28,15 @@ np.set_printoptions(precision=4, suppress=True, linewidth=200)
 # set these before import RWKV
 os.environ['RWKV_JIT_ON'] = '1'
 os.environ["RWKV_CUDA_ON"] = '0' #  if '1' then compile CUDA kernel for seq mode (much faster)
-os.environ["RWKV_T_MAX"] = '1024'

-from src.model import RWKV # pip install rwkv
+# from rwkv.model import RWKV # pip install rwkv
+from src.rlhf.rwkv import RWKV 
+# model = RWKV(model='./model/rwkv-190.pth', strategy='cpu fp32')
+model = RWKV(model='./model/RWKV-4-Pile-169M-20220807-8023.pth', strategy='cpu fp32')
+
 # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16')
 # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cuda fp16i8')
-model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32')
+# model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32')
 # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-20220807-8023', strategy='cpu fp32 *3 -> cuda fp16 *6+')
 # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cpu fp32')
 # model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-1b5/RWKV-4-Pile-1B5-20220903-8040', strategy='cuda fp16')
@@ -45,34 +48,37 @@ model = RWKV(model='/fsx/BlinkDL/HF-MODEL/rwkv-4-pile-169m/RWKV-4-Pile-169M-2022

 out, state = model.forward([187, 510, 1563, 310, 247], None)
 print(out.detach().cpu().numpy())                   # get logits
-out, state = model.forward([187, 510], None)
-out, state = model.forward([1563], state)           # RNN has state (use deepcopy to clone states)
-out, state = model.forward([310, 247], state)
-print(out.detach().cpu().numpy())                   # same result as above
+# out, state = model.forward([187, 510], None)
+# out, state = model.forward([1563], state)           # RNN has state (use deepcopy to clone states)
+# out, state = model.forward([310, 247], state)
+# print(out.detach().cpu().numpy())                   # same result as above

-print('\n')
+import ipdb
+ipdb.set_trace()

-from src.utils import PIPELINE, PIPELINE_ARGS
-pipeline = PIPELINE(model, "20B_tokenizer.json")
+# print('\n')

-ctx = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese."
-print(ctx, end='')
+# from src.utils import PIPELINE, PIPELINE_ARGS
+# pipeline = PIPELINE(model, "20B_tokenizer.json")

-def my_print(s):
-    print(s, end='', flush=True)
+# ctx = "\nIn a shocking finding, scientist discovered a herd of dragons living in a remote, previously unexplored valley, in Tibet. Even more surprising to the researchers was the fact that the dragons spoke perfect Chinese."
+# print(ctx, end='')

-# For alpha_frequency and alpha_presence, see "Frequency and presence penalties":
-# https://platform.openai.com/docs/api-reference/parameter-details
+# def my_print(s):
+#     print(s, end='', flush=True)

-args = PIPELINE_ARGS(temperature = 1.0, top_p = 0.7,
-                     alpha_frequency = 0.25,
-                     alpha_presence = 0.25,
-                     token_ban = [0], # ban the generation of some tokens
-                     token_stop = []) # stop generation whenever you see any token here
+# # For alpha_frequency and alpha_presence, see "Frequency and presence penalties":
+# # https://platform.openai.com/docs/api-reference/parameter-details

-########################################################################################################
-# 1. set os.environ["RWKV_CUDA_ON"] = '1' if possible, for faster preprocess of a long ctx.
-# 2. Reuse the state (use deepcopy to clone it) when you are running the same ctx multiple times. 
-pipeline.generate(ctx, token_count=200, args=args, callback=my_print)
+# args = PIPELINE_ARGS(temperature = 1.0, top_p = 0.7,
+#                      alpha_frequency = 0.25,
+#                      alpha_presence = 0.25,
+#                      token_ban = [0], # ban the generation of some tokens
+#                      token_stop = []) # stop generation whenever you see any token here
+
+# ########################################################################################################
+# # 1. set os.environ["RWKV_CUDA_ON"] = '1' if possible, for faster preprocess of a long ctx.
+# # 2. Reuse the state (use deepcopy to clone it) when you are running the same ctx multiple times. 
+# pipeline.generate(ctx, token_count=200, args=args, callback=my_print)

-print('\n')
\ No newline at end of file
+# print('\n')
\ No newline at end of file
--- a/src/rlhf/__pycache__/__init__.cpython-38.pyc
+++ b/src/rlhf/__pycache__/__init__.cpython-38.pyc
--- a/src/rlhf/__pycache__/rwkv.cpython-38.pyc
+++ b/src/rlhf/__pycache__/rwkv.cpython-38.pyc
--- a/src/rlhf/rwkv.py
+++ b/src/rlhf/rwkv.py