diff --git a/paddle/fluid/operators/clip_op_npu.cc b/paddle/fluid/operators/clip_op_npu.cc new file mode 100644 index 0000000000000000000000000000000000000000..ab35182b417190ac393f501d1993e43a0543a931 --- /dev/null +++ b/paddle/fluid/operators/clip_op_npu.cc @@ -0,0 +1,113 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/clip_op.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class ClipNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + + auto min_tensor = ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; + auto max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; + + Tensor min_tensor_temp(x->type()); + Tensor max_tensor_temp(x->type()); + if (min_tensor == nullptr) { + auto min_value = static_cast(ctx.Attr("min")); + min_tensor_temp.mutable_data({1}, ctx.GetPlace()); + FillNpuTensorWithConstant(&min_tensor_temp, min_value); + min_tensor = &min_tensor_temp; + } + + if (max_tensor == nullptr) { + auto max_value = static_cast(ctx.Attr("max")); + max_tensor_temp.mutable_data({1}, ctx.GetPlace()); + FillNpuTensorWithConstant(&max_tensor_temp, max_value); + max_tensor = &max_tensor_temp; + } + + auto stream = + ctx.template device_context() + .stream(); + const auto& runner = + NpuOpRunner("ClipByValue", {*x, *min_tensor, *max_tensor}, {*out}, {}); + runner.Run(stream); + } +}; + +template +class ClipGradNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + dx->mutable_data(ctx.GetPlace()); + + auto* min_tensor = ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; + auto* max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; + + auto min_val = ctx.Attr("min"); + if (min_tensor) { + Tensor min_data; + framework::TensorCopy( + *min_tensor, platform::CPUPlace(), + ctx.template device_context(), &min_data); + ctx.template device_context().Wait(); + min_val = static_cast(min_data.data()[0]); + } + + auto max_val = ctx.Attr("max"); + if (max_tensor) { + Tensor max_data; + framework::TensorCopy( + *max_tensor, platform::CPUPlace(), + ctx.template device_context(), &max_data); + ctx.template device_context().Wait(); + max_val = static_cast(max_data.data()[0]); + } + + auto stream = + ctx.template device_context() + .stream(); + const auto& runner = + NpuOpRunner("HardtanhGrad", {*x, *dout}, {*dx}, + {{"min_val", min_val}, {"max_val", max_val}}); + runner.Run(stream); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_NPU_KERNEL( + clip, ops::ClipNPUKernel, + ops::ClipNPUKernel); + +REGISTER_OP_NPU_KERNEL( + clip_grad, ops::ClipGradNPUKernel, + ops::ClipGradNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py new file mode 100644 index 0000000000000000000000000000000000000000..65dcc73aa46d03ade89de9c108df351ec86892e2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_clip_op_npu.py @@ -0,0 +1,215 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard +import sys +sys.path.append("..") +from op_test import OpTest + + +class TestClipOp(OpTest): + def set_npu(self): + self.__class__.use_npu = True + self.place = paddle.NPUPlace(0) + + def setUp(self): + self.set_npu() + self.max_relative_error = 0.006 + + self.inputs = {} + self.initTestCase() + + self.op_type = "clip" + self.attrs = {} + self.attrs['min'] = self.min + self.attrs['max'] = self.max + if 'Min' in self.inputs: + min_v = self.inputs['Min'] + else: + min_v = self.attrs['min'] + + if 'Max' in self.inputs: + max_v = self.inputs['Max'] + else: + max_v = self.attrs['max'] + + input = np.random.random(self.shape).astype("float32") + input[np.abs(input - min_v) < self.max_relative_error] = 0.5 + input[np.abs(input - max_v) < self.max_relative_error] = 0.5 + self.inputs['X'] = input + self.outputs = {'Out': np.clip(self.inputs['X'], min_v, max_v)} + + def test_check_output(self): + paddle.enable_static() + self.check_output_with_place(self.place) + paddle.disable_static() + + def test_check_grad_normal(self): + paddle.enable_static() + self.check_grad_with_place(self.place, ['X'], 'Out') + paddle.disable_static() + + def initTestCase(self): + self.shape = (4, 10, 10) + self.max = 0.8 + self.min = 0.3 + self.inputs['Max'] = np.array([0.8]).astype('float32') + self.inputs['Min'] = np.array([0.1]).astype('float32') + + +class TestCase1(TestClipOp): + def initTestCase(self): + self.shape = (8, 16, 8) + self.max = 0.7 + self.min = 0.0 + + +class TestCase2(TestClipOp): + def initTestCase(self): + self.shape = (8, 16) + self.max = 1.0 + self.min = 0.0 + + +class TestCase3(TestClipOp): + def initTestCase(self): + self.shape = (4, 8, 16) + self.max = 0.7 + self.min = 0.2 + + +class TestCase4(TestClipOp): + def initTestCase(self): + self.shape = (4, 8, 8) + self.max = 0.7 + self.min = 0.2 + self.inputs['Max'] = np.array([0.8]).astype('float32') + self.inputs['Min'] = np.array([0.3]).astype('float32') + + +class TestCase5(TestClipOp): + def initTestCase(self): + self.shape = (4, 8, 16) + self.max = 0.5 + self.min = 0.5 + + +class TestClipOpError(unittest.TestCase): + def test_errors(self): + paddle.enable_static() + with program_guard(Program(), Program()): + input_data = np.random.random((2, 4)).astype("float32") + + def test_Variable(): + fluid.layers.clip(x=input_data, min=-1.0, max=1.0) + + self.assertRaises(TypeError, test_Variable) + + def test_dtype(): + x2 = fluid.layers.data(name='x2', shape=[1], dtype='int32') + fluid.layers.clip(x=x2, min=-1.0, max=1.0) + + self.assertRaises(TypeError, test_dtype) + paddle.disable_static() + + +class TestClipAPI(unittest.TestCase): + def _executed_api(self, x, min=None, max=None): + return paddle.clip(x, min, max) + + def test_clip(self): + paddle.enable_static() + data_shape = [1, 9, 9, 4] + data = np.random.random(data_shape).astype('float32') + images = fluid.data(name='image', shape=data_shape, dtype='float32') + min = fluid.data(name='min', shape=[1], dtype='float32') + max = fluid.data(name='max', shape=[1], dtype='float32') + + place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + out_1 = self._executed_api(images, min=min, max=max) + out_2 = self._executed_api(images, min=0.2, max=0.9) + out_3 = self._executed_api(images, min=0.3) + out_4 = self._executed_api(images, max=0.7) + out_5 = self._executed_api(images, min=min) + out_6 = self._executed_api(images, max=max) + out_7 = self._executed_api(images, max=-1.) + out_8 = self._executed_api(images) + + res1, res2, res3, res4, res5, res6, res7, res8 = exe.run( + fluid.default_main_program(), + feed={ + "image": data, + "min": np.array([0.2]).astype('float32'), + "max": np.array([0.8]).astype('float32') + }, + fetch_list=[ + out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 + ]) + + self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8))) + self.assertTrue(np.allclose(res2, data.clip(0.2, 0.9))) + self.assertTrue(np.allclose(res3, data.clip(min=0.3))) + self.assertTrue(np.allclose(res4, data.clip(max=0.7))) + self.assertTrue(np.allclose(res5, data.clip(min=0.2))) + self.assertTrue(np.allclose(res6, data.clip(max=0.8))) + self.assertTrue(np.allclose(res7, data.clip(max=-1))) + self.assertTrue(np.allclose(res8, data)) + paddle.disable_static() + + def test_clip_dygraph(self): + paddle.disable_static() + place = fluid.NPUPlace(0) if fluid.core.is_compiled_with_npu( + ) else fluid.CPUPlace() + paddle.disable_static(place) + data_shape = [1, 9, 9, 4] + data = np.random.random(data_shape).astype('float32') + images = paddle.to_tensor(data, dtype='float32') + v_min = paddle.to_tensor(np.array([0.2], dtype=np.float32)) + v_max = paddle.to_tensor(np.array([0.8], dtype=np.float32)) + + out_1 = self._executed_api(images, min=0.2, max=0.8) + images = paddle.to_tensor(data, dtype='float32') + out_2 = self._executed_api(images, min=0.2, max=0.9) + images = paddle.to_tensor(data, dtype='float32') + out_3 = self._executed_api(images, min=v_min, max=v_max) + + self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8))) + self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9))) + self.assertTrue(np.allclose(out_3.numpy(), data.clip(0.2, 0.8))) + + def test_errors(self): + paddle.enable_static() + x1 = fluid.data(name='x1', shape=[1], dtype="int16") + x2 = fluid.data(name='x2', shape=[1], dtype="int8") + self.assertRaises(TypeError, paddle.clip, x=x1, min=0.2, max=0.8) + self.assertRaises(TypeError, paddle.clip, x=x2, min=0.2, max=0.8) + paddle.disable_static() + + +class TestInplaceClipAPI(TestClipAPI): + def _executed_api(self, x, min=None, max=None): + return x.clip_(min, max) + + +if __name__ == '__main__': + unittest.main()