diff --git a/paddle/fluid/operators/clip_op_mlu.cc b/paddle/fluid/operators/clip_op_mlu.cc new file mode 100644 index 0000000000000000000000000000000000000000..88e8fe778dadcdeeb20bb18dd7c82a206764d538 --- /dev/null +++ b/paddle/fluid/operators/clip_op_mlu.cc @@ -0,0 +1,115 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/mlu/mlu_baseop.h" + +namespace paddle { +namespace operators { + +template +class ClipMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + + auto min = static_cast(ctx.Attr("min")); + auto max = static_cast(ctx.Attr("max")); + + if (ctx.HasInput("Min")) { + Tensor min_cpu; + auto* min_tensor = ctx.Input("Min"); + auto* min_data = min_tensor->data(); + if (platform::is_mlu_place(min_tensor->place())) { + paddle::framework::TensorCopySync(*min_tensor, platform::CPUPlace(), + &min_cpu); + min_data = min_cpu.data(); + } + min = min_data[0]; + } + + if (ctx.HasInput("Max")) { + Tensor max_cpu; + auto* max_tensor = ctx.Input("Max"); + auto* max_data = max_tensor->data(); + if (platform::is_mlu_place(max_tensor->place())) { + paddle::framework::TensorCopySync(*max_tensor, platform::CPUPlace(), + &max_cpu); + max_data = max_cpu.data(); + } + max = max_data[0]; + } + out->mutable_data(ctx.GetPlace()); + + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc out_desc(*out); + MLUCnnl::Clip(ctx, x_desc.get(), GetBasePtr(x), + static_cast(&min), + static_cast(&max), GetBasePtr(out)); + } +}; + +template +class ClipGradMLUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + dx->mutable_data(ctx.GetPlace()); + + auto* min_tensor = ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; + auto* max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; + + auto min_val = ctx.Attr("min"); + if (min_tensor) { + Tensor min_data; + framework::TensorCopy( + *min_tensor, platform::CPUPlace(), + ctx.template device_context(), &min_data); + ctx.template device_context().Wait(); + min_val = static_cast(min_data.data()[0]); + } + auto max_val = ctx.Attr("max"); + if (max_tensor) { + Tensor max_data; + framework::TensorCopy( + *max_tensor, platform::CPUPlace(), + ctx.template device_context(), &max_data); + ctx.template device_context().Wait(); + max_val = static_cast(max_data.data()[0]); + } + + MLUCnnlTensorDesc x_desc(*x); + MLUCnnlTensorDesc dx_desc(*dx); + MLUCnnlTensorDesc dout_desc(*dout); + + MLUCnnl::HardtanhBackward(ctx, x_desc.get(), GetBasePtr(x), dout_desc.get(), + GetBasePtr(dout), max_val, min_val, dx_desc.get(), + GetBasePtr(dx)); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_MLU_KERNEL(clip, ops::ClipMLUKernel, + ops::ClipMLUKernel); + +REGISTER_OP_MLU_KERNEL(clip_grad, ops::ClipGradMLUKernel, + ops::ClipGradMLUKernel); diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc index 5b452ca3ba2eabe562d7d80362491c68dcb71d4c..dc8301b9e0b8dc24ebb2f7be6a3ee976e5092bf1 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.cc +++ b/paddle/fluid/operators/mlu/mlu_baseop.cc @@ -1942,6 +1942,25 @@ MLUCnnlTrigonDesc::~MLUCnnlTrigonDesc() { cast_type, output_desc, output)); } +/*static*/ void MLUCnnl::Clip(const ExecutionContext& ctx, + const cnnlTensorDescriptor_t x_desc, + const void* x, const void* min, const void* max, + void* y) { + cnnlHandle_t handle = GetHandleFromCTX(ctx); + PADDLE_ENFORCE_MLU_SUCCESS(cnnlClip(handle, x_desc, x, min, max, y)); +} + +/*static*/ void MLUCnnl::HardtanhBackward( + const ExecutionContext& ctx, const cnnlTensorDescriptor_t x_desc, + const void* x, const cnnlTensorDescriptor_t diff_y_desc, const void* diff_y, + const float max_val, const float min_val, + const cnnlTensorDescriptor_t diff_x_desc, void* diff_x) { + cnnlHandle_t handle = GetHandleFromCTX(ctx); + PADDLE_ENFORCE_MLU_SUCCESS( + cnnlHardtanhBackward(handle, x_desc, x, diff_y_desc, diff_y, max_val, + min_val, diff_x_desc, diff_x)); +} + /* static */ void MLUCnnl::PoolingBackward( const ExecutionContext& ctx, const cnnlPoolingDescriptor_t pooling_desc, const void* alpha, const cnnlTensorDescriptor_t y_desc, const void* y, diff --git a/paddle/fluid/operators/mlu/mlu_baseop.h b/paddle/fluid/operators/mlu/mlu_baseop.h index 1763fc56cebf982e9dd2d22203d50edbfc882121..774e297c06dd0b78217d53455294ccc43cba2b98 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.h +++ b/paddle/fluid/operators/mlu/mlu_baseop.h @@ -439,6 +439,16 @@ class MLUCnnl { const cnnlTensorDescriptor_t input_desc, const void* input, const cnnlTensorDescriptor_t output_desc, void* output); + static void Clip(const ExecutionContext& ctx, + const cnnlTensorDescriptor_t input_desc, const void* input, + const void* min, const void* max, void* y); + + static void HardtanhBackward( + const ExecutionContext& ctx, const cnnlTensorDescriptor_t x_desc, + const void* x, const cnnlTensorDescriptor_t diff_y_desc, + const void* diff_y, const float max_val, const float min_val, + const cnnlTensorDescriptor_t diff_x_desc, void* diff_x); + static void Div(const ExecutionContext& ctx, cnnlComputationPreference_t prefer, const cnnlTensorDescriptor_t in0_desc, const void* in0, diff --git a/python/paddle/fluid/tests/unittests/mlu/test_clip_op_mlu.py b/python/paddle/fluid/tests/unittests/mlu/test_clip_op_mlu.py new file mode 100644 index 0000000000000000000000000000000000000000..1a8f617be6de25206873c0e259313807d7e2b266 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mlu/test_clip_op_mlu.py @@ -0,0 +1,136 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import sys + +sys.path.append("..") +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard +from op_test import OpTest +from paddle.fluid.framework import _test_eager_guard + +paddle.enable_static() + + +class TestClipOp(OpTest): + + def setUp(self): + self.place = paddle.device.MLUPlace(0) + self.__class__.use_mlu = True + self.max_relative_error = 0.006 + self.python_api = paddle.clip + + self.inputs = {} + self.initTestCase() + + self.op_type = "clip" + self.attrs = {} + self.attrs['min'] = self.min + self.attrs['max'] = self.max + if 'Min' in self.inputs: + min_v = self.inputs['Min'] + else: + min_v = self.attrs['min'] + + if 'Max' in self.inputs: + max_v = self.inputs['Max'] + else: + max_v = self.attrs['max'] + + input = np.random.random(self.shape).astype(self.dtype) + input[np.abs(input - min_v) < self.max_relative_error] = 0.5 + input[np.abs(input - max_v) < self.max_relative_error] = 0.5 + self.inputs['X'] = input + self.outputs = {'Out': np.clip(self.inputs['X'], min_v, max_v)} + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad_normal(self): + self.check_grad_with_place(self.place, ['X'], 'Out') + + def initTestCase(self): + self.dtype = np.float32 + self.shape = (4, 10, 10) + self.max = 0.8 + self.min = 0.3 + self.inputs['Max'] = np.array([0.8]).astype(self.dtype) + self.inputs['Min'] = np.array([0.1]).astype(self.dtype) + + +class TestCase1(TestClipOp): + + def initTestCase(self): + self.dtype = np.float32 + self.shape = (8, 16, 8) + self.max = 0.7 + self.min = 0.0 + + +class TestCase2(TestClipOp): + + def initTestCase(self): + self.dtype = np.float32 + self.shape = (8, 16) + self.max = 1.0 + self.min = 0.0 + + +class TestCase3(TestClipOp): + + def initTestCase(self): + self.dtype = np.float32 + self.shape = (4, 8, 16) + self.max = 0.7 + self.min = 0.2 + + +class TestCase4(TestClipOp): + + def initTestCase(self): + self.dtype = np.float32 + self.shape = (4, 8, 8) + self.max = 0.7 + self.min = 0.2 + self.inputs['Max'] = np.array([0.8]).astype(self.dtype) + self.inputs['Min'] = np.array([0.3]).astype(self.dtype) + + +class TestCase5(TestClipOp): + + def initTestCase(self): + self.dtype = np.float32 + self.shape = (4, 8, 16) + self.max = 0.5 + self.min = 0.5 + + +class TestCase6(TestClipOp): + + def initTestCase(self): + self.dtype = np.float16 + self.shape = (4, 8, 8) + self.max = 0.7 + self.min = 0.2 + self.inputs['Max'] = np.array([0.8]).astype(self.dtype) + self.inputs['Min'] = np.array([0.3]).astype(self.dtype) + + +if __name__ == '__main__': + unittest.main()