未验证 提交 5634d2ca 编写于 作者: Z Zhong Hui 提交者: GitHub

[cherry pick] double grad support for abs

 double grad support for abs 
上级 b19a5979
...@@ -711,10 +711,28 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel { ...@@ -711,10 +711,28 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel {
} }
}; };
// // AbsGrad: dx=dy if x >=0 else -dy
// AbsDoubleGrad: ddy = ddx if x >=0 else -ddx
template <typename T>
class AbsDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("abs_grad_grad");
// input1: x
op->SetInput("X", this->Input("X"));
// input2: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(this->Attrs());
// output: ddy
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
// ReluGrad: dx = dy if y >= 0 else 0 // ReluGrad: dx = dy if y >= 0 else 0
// ReluGradGrad: ddy = ddx if y >= 0 else 0 // ReluGradGrad: ddy = ddx if y >= 0 else 0
//
template <typename T> template <typename T>
class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> { class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public: public:
...@@ -1162,7 +1180,13 @@ REGISTER_OPERATOR( ...@@ -1162,7 +1180,13 @@ REGISTER_OPERATOR(
std::conditional<ops::CanInplaceAct<ops::AbsGradFunctor<float>>(), std::conditional<ops::CanInplaceAct<ops::AbsGradFunctor<float>>(),
ops::ActFwdInplaceInferer, void>::type); ops::ActFwdInplaceInferer, void>::type);
REGISTER_OPERATOR(abs_grad, ops::ActivationOpGrad, REGISTER_OPERATOR(abs_grad, ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInference); ops::ActivationGradOpInplaceInference,
ops::AbsDoubleGradMaker<paddle::framework::OpDesc>,
ops::AbsDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
abs_grad_grad,
ops::ActivationOpDoubleGrad<ops::AbsGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInference);
REGISTER_OP_CPU_KERNEL(abs, REGISTER_OP_CPU_KERNEL(abs,
ops::ActivationKernel<paddle::platform::CPUDeviceContext, ops::ActivationKernel<paddle::platform::CPUDeviceContext,
...@@ -1182,4 +1206,16 @@ REGISTER_OP_CPU_KERNEL( ...@@ -1182,4 +1206,16 @@ REGISTER_OP_CPU_KERNEL(
ops::AbsGradFunctor<int>>, ops::AbsGradFunctor<int>>,
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::AbsGradFunctor<int64_t>>); ops::AbsGradFunctor<int64_t>>);
REGISTER_OP_CPU_KERNEL(
abs_grad_grad,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::AbsGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::AbsGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::AbsGradGradFunctor<plat::float16>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::AbsGradGradFunctor<int>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::AbsGradGradFunctor<int64_t>>);
/* ========================================================================== */ /* ========================================================================== */
...@@ -160,7 +160,7 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -160,7 +160,7 @@ REGISTER_OP_CUDA_KERNEL(
ops::ExpGradFunctor<plat::float16>>); ops::ExpGradFunctor<plat::float16>>);
/* ========================================================================== */ /* ========================================================================== */
/* ========================== exp register ============================ */ /* ========================== abs register ============================ */
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
abs, ops::ActivationKernel<plat::CUDADeviceContext, ops::AbsFunctor<float>>, abs, ops::ActivationKernel<plat::CUDADeviceContext, ops::AbsFunctor<float>>,
...@@ -180,4 +180,16 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -180,4 +180,16 @@ REGISTER_OP_CUDA_KERNEL(
ops::AbsGradFunctor<int64_t>>, ops::AbsGradFunctor<int64_t>>,
ops::ActivationGradKernel<plat::CUDADeviceContext, ops::ActivationGradKernel<plat::CUDADeviceContext,
ops::AbsGradFunctor<plat::float16>>); ops::AbsGradFunctor<plat::float16>>);
REGISTER_OP_CUDA_KERNEL(
abs_grad_grad,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CUDADeviceContext,
ops::AbsGradGradFunctor<plat::float16>>,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<int>>,
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<int64_t>>);
/* ========================================================================== */ /* ========================================================================== */
...@@ -1318,6 +1318,27 @@ class ActivationDoubleGradKernel ...@@ -1318,6 +1318,27 @@ class ActivationDoubleGradKernel
} }
}; };
template <typename T>
struct AbsGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* X,
const framework::Tensor* Out, const framework::Tensor* ddX,
framework::Tensor* ddOut, framework::Tensor* dOut,
framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "AbsGradGrad"));
auto x = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "AbsGradGrad"));
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "AbsGradGrad"));
ddout.device(*d) = ddx * x.sign();
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};
template <typename T> template <typename T>
struct ReluGradGradFunctor : public BaseActivationFunctor<T> { struct ReluGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device> template <typename Device>
......
...@@ -147,5 +147,29 @@ class TestSquareDoubleGradCheck(unittest.TestCase): ...@@ -147,5 +147,29 @@ class TestSquareDoubleGradCheck(unittest.TestCase):
self.func(p) self.func(p)
class TestAbsDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
# the shape of input variable should be clearly specified, not inlcude -1.
shape = [2, 3, 7, 9]
eps = 1e-6
dtype = np.float64
x = layers.data('x', shape, False, dtype)
x.persistable = True
y = layers.abs(x)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册