未验证 提交 5e8b33d7 编写于 作者: jm_12138's avatar jm_12138 提交者: GitHub

fix a bug of tokenize on win (int32 -> int64) (#2036)

* fix a bug of tokenize on win (int32 -> int64)

* fix a bug of tokenize on win (int32 -> int64)
上级 4c8b919b
...@@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): ...@@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77):
for i, tokens in enumerate(all_tokens): for i, tokens in enumerate(all_tokens):
if len(tokens) > context_length: if len(tokens) > context_length:
raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}")
result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64')
return result return result
......
...@@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): ...@@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77):
for i, tokens in enumerate(all_tokens): for i, tokens in enumerate(all_tokens):
if len(tokens) > context_length: if len(tokens) > context_length:
raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}")
result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64')
return result return result
......
...@@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): ...@@ -62,7 +62,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77):
for i, tokens in enumerate(all_tokens): for i, tokens in enumerate(all_tokens):
if len(tokens) > context_length: if len(tokens) > context_length:
raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}")
result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64')
return result return result
......
...@@ -46,7 +46,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 64): ...@@ -46,7 +46,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 64):
for i, tokens in enumerate(all_tokens): for i, tokens in enumerate(all_tokens):
assert len(tokens) <= context_length assert len(tokens) <= context_length
result[i, :len(tokens)] = paddle.to_tensor(tokens) result[i, :len(tokens)] = paddle.to_tensor(tokens, dtype='int64')
return result return result
......
...@@ -49,7 +49,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 64): ...@@ -49,7 +49,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 64):
for i, tokens in enumerate(all_tokens): for i, tokens in enumerate(all_tokens):
assert len(tokens) <= context_length assert len(tokens) <= context_length
result[i, :len(tokens)] = paddle.to_tensor(tokens) result[i, :len(tokens)] = paddle.to_tensor(tokens, dtype='int64')
return result return result
......
...@@ -59,7 +59,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77): ...@@ -59,7 +59,7 @@ def tokenize(texts: Union[str, List[str]], context_length: int = 77):
for i, tokens in enumerate(all_tokens): for i, tokens in enumerate(all_tokens):
if len(tokens) > context_length: if len(tokens) > context_length:
raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}") raise RuntimeError(f"Input {texts[i]} is too long for context length {context_length}")
result[i, :len(tokens)] = paddle.Tensor(np.array(tokens)) result[i, :len(tokens)] = paddle.to_tensor(np.array(tokens), dtype='int64')
return result return result
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册