未验证 提交 bb2f5d24 编写于 作者: H hutuxian 提交者: GitHub

hash_op support int64 hash_size (#18674)

* hash_op support int64 hash_size
* add corresponding UT
上级 a5d4c2fa
......@@ -52,7 +52,7 @@ class HashOpMaker : public framework::OpProtoAndCheckerMaker {
Execute `num_hash` times xxHash algorithm on all elements on second dimension of input.
)DOC");
AddAttr<int>("num_hash", "").SetDefault(1);
AddAttr<int>("mod_by", "").SetDefault(100000);
AddAttr<int64_t>("mod_by", "").SetDefault(100000);
AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape,
"Skip calling InferShape() function in the runtime.")
.SetDefault(true);
......
......@@ -43,7 +43,7 @@ class HashKernel : public framework::OpKernel<T> {
virtual void Compute(const framework::ExecutionContext& context) const {
auto* out_t = context.Output<framework::LoDTensor>("Out");
auto* in_t = context.Input<framework::LoDTensor>("X");
int mod_by = context.Attr<int>("mod_by");
int64_t mod_by = context.Attr<int64_t>("mod_by");
int num_hash = context.Attr<int>("num_hash");
auto in_dims = in_t->dims();
......@@ -59,7 +59,7 @@ class HashKernel : public framework::OpKernel<T> {
for (int idx = 0; idx < seq_length; ++idx) {
for (int ihash = 0; ihash != num_hash; ++ihash) {
output[idx * num_hash + ihash] =
XXH64(input, sizeof(int) * last_dim, ihash) % mod_by;
XXH64(input, sizeof(T) * last_dim, ihash) % mod_by;
}
input += last_dim;
}
......
......@@ -58,5 +58,49 @@ class TestHashNotLoDOp(TestHashOp):
self.check_output()
class TestHashOp2(TestHashOp):
"""
Case:
int64 type input
"""
def setUp(self):
self.op_type = "hash"
self.init_test_case()
self.inputs = {'X': self.in_seq}
self.attrs = {'num_hash': 2, 'mod_by': 10000}
self.outputs = {'Out': self.out_seq}
def init_test_case(self):
self.in_seq = np.array([1, 2**32 + 1]).reshape((2, 1)).astype("int64")
self.out_seq = np.array([1269, 9609, 3868, 7268]).reshape((2, 2, 1))
def test_check_output(self):
self.check_output()
class TestHashOp3(TestHashOp):
"""
Case:
int64 type input
int64 type mod_by attr
"""
def setUp(self):
self.op_type = "hash"
self.init_test_case()
self.inputs = {'X': self.in_seq}
self.attrs = {'num_hash': 2, 'mod_by': 2**32}
self.outputs = {'Out': self.out_seq}
def init_test_case(self):
self.in_seq = np.array([10, 5]).reshape((2, 1)).astype("int64")
self.out_seq = np.array(
[1204014882, 393011615, 3586283837, 2814821595]).reshape((2, 2, 1))
def test_check_output(self):
self.check_output()
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册