[MLU]add reduce op mlu kernel (#41028)

d1c1d731 · zn · GitHub · 649948a6 · d1c1d731 · d1c1d731
15 changed file
--- a/paddle/fluid/operators/collective/c_reduce_max_op_mlu.cc
+++ b/paddle/fluid/operators/collective/c_reduce_max_op_mlu.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/collective/c_reduce_op.h"
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_MLU_KERNEL(c_reduce_max,
+                       ops::CReduceOpMLUKernel<ops::kRedMax, float>,
+                       ops::CReduceOpMLUKernel<ops::kRedMax, plat::float16>,
+                       ops::CReduceOpMLUKernel<ops::kRedMax, int>,
+                       ops::CReduceOpMLUKernel<ops::kRedMax, int16_t>,
+                       ops::CReduceOpMLUKernel<ops::kRedMax, int8_t>,
+                       ops::CReduceOpMLUKernel<ops::kRedMax, uint8_t>)
--- a/paddle/fluid/operators/collective/c_reduce_min_op_mlu.cc
+++ b/paddle/fluid/operators/collective/c_reduce_min_op_mlu.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/collective/c_reduce_op.h"
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_MLU_KERNEL(c_reduce_min,
+                       ops::CReduceOpMLUKernel<ops::kRedMin, float>,
+                       ops::CReduceOpMLUKernel<ops::kRedMin, plat::float16>,
+                       ops::CReduceOpMLUKernel<ops::kRedMin, int>,
+                       ops::CReduceOpMLUKernel<ops::kRedMin, int16_t>,
+                       ops::CReduceOpMLUKernel<ops::kRedMin, int8_t>,
+                       ops::CReduceOpMLUKernel<ops::kRedMin, uint8_t>)
--- a/paddle/fluid/operators/collective/c_reduce_op.h
+++ b/paddle/fluid/operators/collective/c_reduce_op.h
@@ -47,6 +47,10 @@ limitations under the License. */
 #include "paddle/fluid/platform/device/npu/hccl_helper.h"
 #endif

+#if defined(PADDLE_WITH_CNCL)
+#include "paddle/fluid/platform/device/mlu/cncl_helper.h"
+#endif
+
 namespace paddle {
 namespace operators {

@@ -331,6 +335,68 @@ class CReduceOpCUDAKernel : public framework::OpKernel<T> {
  }
 };

+template <ReduceType red_type, typename T>
+class CReduceOpMLUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+#if defined(PADDLE_WITH_CNCL)
+    auto in = ctx.Input<framework::LoDTensor>("X");
+    auto out = ctx.Output<framework::LoDTensor>("Out");
+    auto place = ctx.GetPlace();
+    cnclDataType_t dtype =
+        platform::ToCNCLDataType(framework::TransToProtoVarType(in->dtype()));
+    int64_t numel = in->numel();
+
+    const void* sendbuff = in->data();
+    out->Resize(in->dims());
+    void* recvbuff = out->mutable_data<T>(place);
+
+    int rid = ctx.Attr<int>("ring_id");
+    int root = ctx.Attr<int>("root_id");
+    auto comm = paddle::platform::CNCLCommContext::Instance().Get(rid, place);
+
+    mluStream stream = nullptr;
+    if (ctx.Attr<bool>("use_calc_stream")) {
+      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
+      stream = static_cast<platform::MLUDeviceContext*>(dev_ctx)->stream();
+    } else {
+      stream = comm->stream();
+    }
+
+    cnclReduceOp_t cncl_red_type = cnclSum;
+    switch (red_type) {
+      case kRedSum:
+        cncl_red_type = cnclSum;
+        break;
+
+      case kRedMax:
+        cncl_red_type = cnclMax;
+        break;
+
+      case kRedMin:
+        cncl_red_type = cnclMin;
+        break;
+
+      case kRedProd:
+        cncl_red_type = cnclProd;
+        break;
+
+      default:
+        PADDLE_THROW(platform::errors::InvalidArgument(
+            "Invalid reduce type: %d", red_type));
+    }
+
+    PADDLE_ENFORCE_MLU_SUCCESS(cnclReduce(sendbuff, recvbuff, numel, dtype,
+                                          cncl_red_type, root, comm->comm(),
+                                          stream));
+
+#else
+    PADDLE_THROW(platform::errors::PreconditionNotMet(
+        "PaddlePaddle should compile with MLU."));
+#endif
+  }
+};
+
 class CReduceOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() {

--- a/paddle/fluid/operators/collective/c_reduce_prod_op_mlu.cc
+++ b/paddle/fluid/operators/collective/c_reduce_prod_op_mlu.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/collective/c_reduce_op.h"
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_MLU_KERNEL(c_reduce_prod,
+                       ops::CReduceOpMLUKernel<ops::kRedProd, float>,
+                       ops::CReduceOpMLUKernel<ops::kRedProd, plat::float16>,
+                       ops::CReduceOpMLUKernel<ops::kRedProd, int>,
+                       ops::CReduceOpMLUKernel<ops::kRedProd, int16_t>,
+                       ops::CReduceOpMLUKernel<ops::kRedProd, int8_t>,
+                       ops::CReduceOpMLUKernel<ops::kRedProd, uint8_t>)
--- a/paddle/fluid/operators/collective/c_reduce_sum_op_mlu.cc
+++ b/paddle/fluid/operators/collective/c_reduce_sum_op_mlu.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/collective/c_reduce_op.h"
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_MLU_KERNEL(c_reduce_sum,
+                       ops::CReduceOpMLUKernel<ops::kRedSum, float>,
+                       ops::CReduceOpMLUKernel<ops::kRedSum, plat::float16>,
+                       ops::CReduceOpMLUKernel<ops::kRedSum, int>,
+                       ops::CReduceOpMLUKernel<ops::kRedSum, int16_t>,
+                       ops::CReduceOpMLUKernel<ops::kRedSum, int8_t>,
+                       ops::CReduceOpMLUKernel<ops::kRedSum, uint8_t>)
--- a/python/paddle/fluid/tests/unittests/mlu/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/mlu/CMakeLists.txt
@@ -26,6 +26,10 @@ if (WITH_MLU)
        set_tests_properties(test_collective_allreduce_min PROPERTIES TIMEOUT 120)
 	set_tests_properties(test_collective_allreduce_prod PROPERTIES TIMEOUT 120)
 	set_tests_properties(test_collective_allgather PROPERTIES TIMEOUT 120)
+	set_tests_properties(test_collective_reduce_sum PROPERTIES TIMEOUT 120)
+        set_tests_properties(test_collective_reduce_max PROPERTIES TIMEOUT 120)
+        set_tests_properties(test_collective_reduce_min PROPERTIES TIMEOUT 120)
+        set_tests_properties(test_collective_reduce_prod PROPERTIES TIMEOUT 120)
        set_tests_properties(test_collective_broadcast_api_mlu PROPERTIES TIMEOUT 120)
        set_tests_properties(test_collective_allreduce_api_mlu PROPERTIES TIMEOUT 120)
 	set_tests_properties(test_collective_allgather_api_mlu PROPERTIES TIMEOUT 120)

--- a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_api.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import argparse
+import os
+import sys
+import signal
+import time
+import socket
+from contextlib import closing
+from six import string_types
+import math
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.profiler as profiler
+import paddle.fluid.unique_name as nameGen
+from paddle.fluid import core
+import unittest
+from multiprocessing import Process
+import paddle.fluid.layers as layers
+from functools import reduce
+from test_collective_api_base_mlu import TestCollectiveAPIRunnerBase, runtime_main
+
+paddle.enable_static()
+
+
+class TestCollectiveReduceAPI(TestCollectiveAPIRunnerBase):
+    def __init__(self):
+        self.global_ring_id = 0
+
+    def get_model(self, main_prog, startup_program, rank):
+        with fluid.program_guard(main_prog, startup_program):
+            tindata = layers.data(
+                name="tindata", shape=[10, 1000], dtype='float32')
+            paddle.distributed.reduce(tindata, dst=0)
+            return [tindata]
+
+
+if __name__ == "__main__":
+    runtime_main(TestCollectiveReduceAPI, "reduce")
--- a/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py
+++ b/python/paddle/fluid/tests/unittests/mlu/collective_reduce_op.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import argparse
+import os
+import sys
+import signal
+import time
+import socket
+from contextlib import closing
+from six import string_types
+import math
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.profiler as profiler
+import paddle.fluid.unique_name as nameGen
+from paddle.fluid import core
+import unittest
+from multiprocessing import Process
+import paddle.fluid.layers as layers
+from functools import reduce
+from test_collective_base_mlu import TestCollectiveRunnerBase, runtime_main
+
+paddle.enable_static()
+
+
+class TestCollectiveReduce(TestCollectiveRunnerBase):
+    def __init__(self):
+        self.global_ring_id = 0
+
+    def get_model(self, main_prog, startup_program, col_type):
+        ring_id = 0
+        rootid = 1
+        with fluid.program_guard(main_prog, startup_program):
+            tindata = layers.data(
+                name="tindata", shape=[10, 1000], dtype='float32')
+            toutdata = main_prog.current_block().create_var(
+                name="outof" + col_type,
+                dtype='float32',
+                type=core.VarDesc.VarType.LOD_TENSOR,
+                persistable=False,
+                stop_gradient=False)
+            main_prog.global_block().append_op(
+                type="c_" + col_type,
+                inputs={'X': tindata},
+                attrs={'ring_id': ring_id,
+                       'root_id': rootid},
+                outputs={'Out': toutdata})
+            main_prog.global_block().append_op(
+                type="c_sync_comm_stream",
+                inputs={'X': toutdata},
+                outputs={'Out': toutdata},
+                attrs={'ring_id': ring_id})
+            return toutdata
+
+
+if __name__ == "__main__":
+    runtime_main(TestCollectiveReduce)
--- a/python/paddle/fluid/tests/unittests/mlu/test_collective_api_base_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_api_base_mlu.py
@@ -219,6 +219,9 @@ class TestDistBase(unittest.TestCase):
            self.assertTrue(
                np.allclose(
                    tr1_out, need_result, rtol=1e-05, atol=1e-05))
+        elif col_type == "reduce":
+            need_result = input1 + input2
+            self.assertTrue(np.allclose(tr0_out, need_result))
        elif col_type == "allgather":
            need_result = np.vstack((input1, input2))
            tr_out0 = np.vstack((tr0_out[0], tr0_out[1]))

--- a/python/paddle/fluid/tests/unittests/mlu/test_collective_base_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_base_mlu.py
@@ -295,6 +295,18 @@ class TestDistBase(unittest.TestCase):
            self.assertTrue(
                np.allclose(
                    tr1_out, need_result, rtol=1e-05, atol=1e-05))
+        elif col_type == "reduce_sum":
+            need_result = input1 + input2
+            self.assertTrue(np.allclose(tr1_out, need_result))
+        elif col_type == "reduce_prod":
+            need_result = input1 * input2
+            self.assertTrue(np.allclose(tr1_out, need_result))
+        elif col_type == "reduce_max":
+            need_result = np.maximum(input1, input2)
+            self.assertTrue(np.allclose(tr1_out, need_result))
+        elif col_type == "reduce_min":
+            need_result = np.minimum(input1, input2)
+            self.assertTrue(np.allclose(tr1_out, need_result))
        elif col_type == "allgather":
            need_result = np.vstack((input1, input2))
            self.assertTrue(np.allclose(tr0_out, need_result))

--- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_api_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_api_mlu.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import unittest
+import numpy as np
+import paddle
+
+from test_collective_api_base_mlu import TestDistBase
+
+paddle.enable_static()
+
+
+class TestCollectiveReduceAPI(TestDistBase):
+    def _setup_config(self):
+        pass
+
+    def test_reduce_cncl_fp16(self):
+        self.check_with_place("collective_reduce_api.py", "reduce", "float16")
+
+    def test_reduce_cncl_fp32(self):
+        self.check_with_place("collective_reduce_api.py", "reduce", "float32")
+
+    def test_reduce_cncl_int32(self):
+        self.check_with_place("collective_reduce_api.py", "reduce", "int32")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_max.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_max.py
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import sys
+import unittest
+import numpy as np
+import paddle
+
+from test_collective_base_mlu import TestDistBase
+
+paddle.enable_static()
+
+
+class TestCReduceOp(TestDistBase):
+    def _setup_config(self):
+        pass
+
+    def test_reduce_max_fp32(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_max",
+                              "float32")
+
+    def test_reduce_max_fp16(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_max",
+                              "float16")
+
+    def test_reduce_max_int32(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_max", "int32")
+
+    def test_reduce_max_int16(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_max", "int16")
+
+    def test_reduce_max_int8(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_max", "int8")
+
+    def test_reduce_max_uint8(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_max", "uint8")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_min.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_min.py
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import sys
+import unittest
+import numpy as np
+import paddle
+
+from test_collective_base_mlu import TestDistBase
+
+paddle.enable_static()
+
+
+class TestCReduceOp(TestDistBase):
+    def _setup_config(self):
+        pass
+
+    def test_reduce_min_fp32(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_min",
+                              "float32")
+
+    def test_reduce_min_fp16(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_min",
+                              "float16")
+
+    def test_reduce_min_int32(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_min", "int32")
+
+    def test_reduce_min_int16(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_min", "int16")
+
+    def test_reduce_min_int8(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_min", "int8")
+
+    def test_reduce_min_uint8(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_min", "uint8")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_prod.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_prod.py
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import sys
+import unittest
+import numpy as np
+import paddle
+
+from test_collective_base_mlu import TestDistBase
+
+paddle.enable_static()
+
+
+class TestCReduceOp(TestDistBase):
+    def _setup_config(self):
+        pass
+
+    def test_reduce_prod_fp32(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_prod",
+                              "float32")
+
+    def test_reduce_prod_fp16(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_prod",
+                              "float16")
+
+    def test_reduce_prod_int32(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_prod", "int32")
+
+    def test_reduce_prod_int16(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_prod", "int16")
+
+    def test_reduce_prod_int8(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_prod", "int8")
+
+    def test_reduce_prod_uint8(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_prod", "uint8")
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_sum.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_collective_reduce_sum.py
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import sys
+import unittest
+import numpy as np
+import paddle
+
+from test_collective_base_mlu import TestDistBase
+
+paddle.enable_static()
+
+
+class TestCReduceOp(TestDistBase):
+    def _setup_config(self):
+        pass
+
+    def test_reduce_sum_fp32(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_sum",
+                              "float32")
+
+    def test_reduce_sum_fp16(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_sum",
+                              "float16")
+
+    def test_reduce_sum_int32(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_sum", "int32")
+
+    def test_reduce_sum_int16(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_sum", "int16")
+
+    def test_reduce_sum_int8(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_sum", "int8")
+
+    def test_reduce_sum_uint8(self):
+        self.check_with_place("collective_reduce_op.py", "reduce_sum", "uint8")
+
+
+if __name__ == '__main__':
+    unittest.main()