diff --git a/forward_demo.py b/forward_demo.py index dd64212ed4c8a3e786bad1eec85f8b5e73d8bb51..9de47db04df4bea05d8b19028dd9e05c6a9c4d32 100644 --- a/forward_demo.py +++ b/forward_demo.py @@ -222,7 +222,7 @@ if __name__ == "__main__": from src.trainer import train_callback, generate_init_weight - args.vocab_size = 20000 + args.vocab_size = 50277 from src.model import RWKV model = RWKV(args) @@ -263,7 +263,7 @@ if __name__ == "__main__": trainer.strategy.config["zero_optimization"]["allgather_bucket_size"] = args.ds_bucket_mb * 1000 * 1000 trainer.strategy.config["zero_optimization"]["reduce_bucket_size"] = args.ds_bucket_mb * 1000 * 1000 - seq = torch.randint(0, 20000, (1, 100)) + seq = torch.randint(0, 50277, (1, 100)) model(seq) import ipdb