未验证 提交 798e2e7e 编写于 作者: 光明和真理's avatar 光明和真理 提交者: GitHub

[MLU] add mlu kernel for clip (#43229)

上级 9ad05afd
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
namespace paddle {
namespace operators {
template <typename T>
class ClipMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* out = ctx.Output<Tensor>("Out");
auto min = static_cast<T>(ctx.Attr<float>("min"));
auto max = static_cast<T>(ctx.Attr<float>("max"));
if (ctx.HasInput("Min")) {
Tensor min_cpu;
auto* min_tensor = ctx.Input<Tensor>("Min");
auto* min_data = min_tensor->data<T>();
if (platform::is_mlu_place(min_tensor->place())) {
paddle::framework::TensorCopySync(*min_tensor, platform::CPUPlace(),
&min_cpu);
min_data = min_cpu.data<T>();
}
min = min_data[0];
}
if (ctx.HasInput("Max")) {
Tensor max_cpu;
auto* max_tensor = ctx.Input<Tensor>("Max");
auto* max_data = max_tensor->data<T>();
if (platform::is_mlu_place(max_tensor->place())) {
paddle::framework::TensorCopySync(*max_tensor, platform::CPUPlace(),
&max_cpu);
max_data = max_cpu.data<T>();
}
max = max_data[0];
}
out->mutable_data<T>(ctx.GetPlace());
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc out_desc(*out);
MLUCnnl::Clip(ctx, x_desc.get(), GetBasePtr(x),
static_cast<const void*>(&min),
static_cast<const void*>(&max), GetBasePtr(out));
}
};
template <typename T>
class ClipGradMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
dx->mutable_data<T>(ctx.GetPlace());
auto* min_tensor = ctx.HasInput("Min") ? ctx.Input<Tensor>("Min") : nullptr;
auto* max_tensor = ctx.HasInput("Max") ? ctx.Input<Tensor>("Max") : nullptr;
auto min_val = ctx.Attr<float>("min");
if (min_tensor) {
Tensor min_data;
framework::TensorCopy(
*min_tensor, platform::CPUPlace(),
ctx.template device_context<platform::DeviceContext>(), &min_data);
ctx.template device_context<paddle::platform::MLUDeviceContext>().Wait();
min_val = static_cast<float>(min_data.data<T>()[0]);
}
auto max_val = ctx.Attr<float>("max");
if (max_tensor) {
Tensor max_data;
framework::TensorCopy(
*max_tensor, platform::CPUPlace(),
ctx.template device_context<platform::DeviceContext>(), &max_data);
ctx.template device_context<paddle::platform::MLUDeviceContext>().Wait();
max_val = static_cast<float>(max_data.data<T>()[0]);
}
MLUCnnlTensorDesc x_desc(*x);
MLUCnnlTensorDesc dx_desc(*dx);
MLUCnnlTensorDesc dout_desc(*dout);
MLUCnnl::HardtanhBackward(ctx, x_desc.get(), GetBasePtr(x), dout_desc.get(),
GetBasePtr(dout), max_val, min_val, dx_desc.get(),
GetBasePtr(dx));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_MLU_KERNEL(clip, ops::ClipMLUKernel<float>,
ops::ClipMLUKernel<plat::float16>);
REGISTER_OP_MLU_KERNEL(clip_grad, ops::ClipGradMLUKernel<float>,
ops::ClipGradMLUKernel<plat::float16>);
...@@ -1942,6 +1942,25 @@ MLUCnnlTrigonDesc::~MLUCnnlTrigonDesc() { ...@@ -1942,6 +1942,25 @@ MLUCnnlTrigonDesc::~MLUCnnlTrigonDesc() {
cast_type, output_desc, output)); cast_type, output_desc, output));
} }
/*static*/ void MLUCnnl::Clip(const ExecutionContext& ctx,
const cnnlTensorDescriptor_t x_desc,
const void* x, const void* min, const void* max,
void* y) {
cnnlHandle_t handle = GetHandleFromCTX(ctx);
PADDLE_ENFORCE_MLU_SUCCESS(cnnlClip(handle, x_desc, x, min, max, y));
}
/*static*/ void MLUCnnl::HardtanhBackward(
const ExecutionContext& ctx, const cnnlTensorDescriptor_t x_desc,
const void* x, const cnnlTensorDescriptor_t diff_y_desc, const void* diff_y,
const float max_val, const float min_val,
const cnnlTensorDescriptor_t diff_x_desc, void* diff_x) {
cnnlHandle_t handle = GetHandleFromCTX(ctx);
PADDLE_ENFORCE_MLU_SUCCESS(
cnnlHardtanhBackward(handle, x_desc, x, diff_y_desc, diff_y, max_val,
min_val, diff_x_desc, diff_x));
}
/* static */ void MLUCnnl::PoolingBackward( /* static */ void MLUCnnl::PoolingBackward(
const ExecutionContext& ctx, const cnnlPoolingDescriptor_t pooling_desc, const ExecutionContext& ctx, const cnnlPoolingDescriptor_t pooling_desc,
const void* alpha, const cnnlTensorDescriptor_t y_desc, const void* y, const void* alpha, const cnnlTensorDescriptor_t y_desc, const void* y,
......
...@@ -439,6 +439,16 @@ class MLUCnnl { ...@@ -439,6 +439,16 @@ class MLUCnnl {
const cnnlTensorDescriptor_t input_desc, const void* input, const cnnlTensorDescriptor_t input_desc, const void* input,
const cnnlTensorDescriptor_t output_desc, void* output); const cnnlTensorDescriptor_t output_desc, void* output);
static void Clip(const ExecutionContext& ctx,
const cnnlTensorDescriptor_t input_desc, const void* input,
const void* min, const void* max, void* y);
static void HardtanhBackward(
const ExecutionContext& ctx, const cnnlTensorDescriptor_t x_desc,
const void* x, const cnnlTensorDescriptor_t diff_y_desc,
const void* diff_y, const float max_val, const float min_val,
const cnnlTensorDescriptor_t diff_x_desc, void* diff_x);
static void Div(const ExecutionContext& ctx, static void Div(const ExecutionContext& ctx,
cnnlComputationPreference_t prefer, cnnlComputationPreference_t prefer,
const cnnlTensorDescriptor_t in0_desc, const void* in0, const cnnlTensorDescriptor_t in0_desc, const void* in0,
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import sys
sys.path.append("..")
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
from op_test import OpTest
from paddle.fluid.framework import _test_eager_guard
paddle.enable_static()
class TestClipOp(OpTest):
def setUp(self):
self.place = paddle.device.MLUPlace(0)
self.__class__.use_mlu = True
self.max_relative_error = 0.006
self.python_api = paddle.clip
self.inputs = {}
self.initTestCase()
self.op_type = "clip"
self.attrs = {}
self.attrs['min'] = self.min
self.attrs['max'] = self.max
if 'Min' in self.inputs:
min_v = self.inputs['Min']
else:
min_v = self.attrs['min']
if 'Max' in self.inputs:
max_v = self.inputs['Max']
else:
max_v = self.attrs['max']
input = np.random.random(self.shape).astype(self.dtype)
input[np.abs(input - min_v) < self.max_relative_error] = 0.5
input[np.abs(input - max_v) < self.max_relative_error] = 0.5
self.inputs['X'] = input
self.outputs = {'Out': np.clip(self.inputs['X'], min_v, max_v)}
def test_check_output(self):
self.check_output_with_place(self.place)
def test_check_grad_normal(self):
self.check_grad_with_place(self.place, ['X'], 'Out')
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 10, 10)
self.max = 0.8
self.min = 0.3
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.1]).astype(self.dtype)
class TestCase1(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (8, 16, 8)
self.max = 0.7
self.min = 0.0
class TestCase2(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (8, 16)
self.max = 1.0
self.min = 0.0
class TestCase3(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 16)
self.max = 0.7
self.min = 0.2
class TestCase4(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 8)
self.max = 0.7
self.min = 0.2
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.3]).astype(self.dtype)
class TestCase5(TestClipOp):
def initTestCase(self):
self.dtype = np.float32
self.shape = (4, 8, 16)
self.max = 0.5
self.min = 0.5
class TestCase6(TestClipOp):
def initTestCase(self):
self.dtype = np.float16
self.shape = (4, 8, 8)
self.max = 0.7
self.min = 0.2
self.inputs['Max'] = np.array([0.8]).astype(self.dtype)
self.inputs['Min'] = np.array([0.3]).astype(self.dtype)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册