diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc new file mode 100644 index 0000000000000000000000000000000000000000..b343fc88d7b8d38f08a1cd494bf349c1ec3f047b --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc @@ -0,0 +1,101 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the Licnse. */ + +#include "paddle/fluid/operators/npu_op_runner.h" +#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +class ReduceMaxNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + auto dims = ctx.Attr>("dim"); + bool keep_dim = ctx.Attr("keep_dim"); + bool reduce_all = ctx.Attr("reduce_all"); + int out_dtype = ctx.Attr("out_dtype"); + + auto place = ctx.GetPlace(); + + framework::Tensor cast_out(x->type()); + cast_out.Resize(out->dims()); + cast_out.mutable_data(place); + + auto cast_out_dtype = x->type(); + if (out_dtype != -1) { + cast_out_dtype = static_cast(out_dtype); + } + + if (x->type() != cast_out_dtype) { + if (cast_out_dtype == framework::proto::VarType::FP32) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::FP16) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::INT16) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::INT32) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::INT64) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::FP64) { + out->mutable_data(place); + } else if (cast_out_dtype == framework::proto::VarType::BOOL) { + out->mutable_data(place); + } + } else { + out->ShareDataWith(cast_out); + } + + framework::NPUAttributeMap attr_input = {{"axes", dims}, + {"keep_dims", keep_dim}}; + + if (reduce_all) { + std::vector dim_vec; + for (int i = 0; i < x->dims().size(); i++) { + dim_vec.push_back(i); + } + + attr_input = {{"axes", dim_vec}, {"keep_dims", keep_dim}}; + } + + auto stream = + ctx.template device_context() + .stream(); + + const auto& runner = + NpuOpRunner("ReduceMaxD", {*x}, {cast_out}, attr_input); + runner.Run(stream); + + if (x->type() != cast_out_dtype) { + auto dst_dtype = ConvertToNpuDtype(cast_out_dtype); + const auto& runner_cast = + NpuOpRunner("Cast", {cast_out}, {*out}, + {{"dst_type", static_cast(dst_dtype)}}); + runner_cast.Run(stream); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OP_NPU_KERNEL( + reduce_max, ops::ReduceMaxNPUKernel, + ops::ReduceMaxNPUKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 368fedececf53336edc7b67f932408d74994d760..1c36cebe70a77ebe0547bede3bcf6e35bec86ffe 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -532,9 +532,11 @@ class ReduceOp : public framework::OperatorWithKernel { #endif if (input_data_type == framework::proto::VarType::FP16) { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, + PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()) || + platform::is_npu_place(ctx.GetPlace()), + true, platform::errors::InvalidArgument( - "float16 can only be used on GPU place")); + "float16 can only be used on GPU or NPU place")); } return framework::OpKernelType(input_data_type, ctx.GetPlace()); } diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py new file mode 100644 index 0000000000000000000000000000000000000000..a5d078ced2876757f3258f8a6aea5cf4bd0155da --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_max_op_npu.py @@ -0,0 +1,280 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci +import paddle +import paddle.fluid.core as core +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard +from paddle.fluid.framework import convert_np_dtype_to_dtype_ + +paddle.enable_static() + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestNPUReduceMaxOp(OpTest): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [-1]} + self.outputs = { + 'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim'])) + } + + def test_check_output(self): + self.check_output_with_place(self.place) + + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def init_dtype(self): + self.dtype = np.float32 + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpMultiAxises(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'dim': [-2, -1]} + self.outputs = { + 'Out': self.inputs['X'].max(axis=tuple(self.attrs['dim'])) + } + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceAll(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = {'reduce_all': True} + self.outputs = {'Out': self.inputs['X'].max()} + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpWithOutDtype_bool(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = { + 'dim': [-2, -1], + 'out_dtype': int(core.VarDesc.VarType.BOOL) + } + self.outputs = { + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype(np.bool) + } + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpWithOutDtype_int16(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = { + 'dim': [-2, -1], + 'out_dtype': int(core.VarDesc.VarType.INT16) + } + + self.out = self.inputs['X'].max(axis=tuple(self.attrs['dim'])) + + self.outputs = { + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype(np.int16) + } + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpWithOutDtype_int32(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = { + 'dim': [-2, -1], + 'out_dtype': int(core.VarDesc.VarType.INT32) + } + self.outputs = { + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype(np.int32) + } + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpWithOutDtype_int64(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = { + 'dim': [-2, -1], + 'out_dtype': int(core.VarDesc.VarType.INT64) + } + self.outputs = { + 'Out': + self.inputs['X'].max(axis=tuple(self.attrs['dim'])).astype(np.int64) + } + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpWithOutDtype_fp16(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = { + 'dim': [-2, -1], + 'out_dtype': int(core.VarDesc.VarType.FP16) + } + + self.out = self.inputs['X'].max(axis=tuple(self.attrs['dim'])) + + self.outputs = { + 'Out': self.inputs['X'].max( + axis=tuple(self.attrs['dim'])).astype(np.float16) + } + + def test_check_output(self): + self.check_output_with_place(self.place, atol=1e-3) + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpWithOutDtype_fp32(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = { + 'dim': [-2, -1], + 'out_dtype': int(core.VarDesc.VarType.FP32) + } + self.outputs = { + 'Out': self.inputs['X'].max( + axis=tuple(self.attrs['dim'])).astype(np.float32) + } + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpWithOutDtype_fp64(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = { + 'dim': [-2, -1], + 'out_dtype': int(core.VarDesc.VarType.FP64) + } + self.outputs = { + 'Out': self.inputs['X'].max( + axis=tuple(self.attrs['dim'])).astype(np.float64) + } + + +@skip_check_grad_ci( + reason="reduce_max is discontinuous non-derivable function," + " its gradient check is not supported by unittest framework.") +class TestReduceMaxOpWithOutDtype_fp32_2(TestNPUReduceMaxOp): + """Remove Max with subgradient from gradient check to confirm the success of CI.""" + + def setUp(self): + self.op_type = "reduce_max" + self.set_npu() + self.init_dtype() + + self.inputs = {'X': np.random.random((5, 6, 10)).astype(self.dtype)} + self.attrs = { + 'dim': [-2, -1], + 'out_dtype': int(core.VarDesc.VarType.FP32) + } + self.outputs = { + 'Out': self.inputs['X'].max( + axis=tuple(self.attrs['dim'])).astype(np.float32) + } + + def init_dtype(self): + self.dtype = np.float16 + + +if __name__ == '__main__': + unittest.main()