未验证 提交 d37b3774 编写于 作者: J Jack Zhou 提交者: GitHub

register log double grad kernel for cpu and cuda

register log double grad kernel for cpu and cuda
上级 d014e29f
...@@ -891,6 +891,28 @@ class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> { ...@@ -891,6 +891,28 @@ class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
} }
}; };
// log Grad: dx = dout / x
// log Grad Grad: ddout = ddx / x; dx = -(dout / x) * (ddx / x)
template <typename T>
class LogDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;
protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("log_grad_grad");
op->SetInput("X", this->Input("X"));
// X@GRAD@GRAD: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
op->SetAttrMap(this->Attrs());
// X@GRAD: dx
op->SetOutput("DX", this->InputGrad("X"));
// Out@GRAD@GRAD: ddy
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};
DECLARE_INPLACE_OP_INFERER(ActivationGradOpInplaceInferer, DECLARE_INPLACE_OP_INFERER(ActivationGradOpInplaceInferer,
{framework::GradVarName("Out"), {framework::GradVarName("Out"),
framework::GradVarName("X")}); framework::GradVarName("X")});
...@@ -1272,6 +1294,35 @@ REGISTER_OP_CPU_KERNEL( ...@@ -1272,6 +1294,35 @@ REGISTER_OP_CPU_KERNEL(
ops::AbsGradGradFunctor<int64_t>>); ops::AbsGradGradFunctor<int64_t>>);
/* ========================================================================== */ /* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_OPERATOR(
log, ops::ActivationOp, ops::LogOpMaker, ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::LogGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::LogGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
ops::ActFwdInplaceInferer);
REGISTER_OPERATOR(log_grad, ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::LogDoubleGradMaker<paddle::framework::OpDesc>,
ops::LogDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(
log_grad_grad,
ops::ActivationOpDoubleGrad<ops::LogGradGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);
REGISTER_ACTIVATION_CPU_KERNEL(log, Log, LogFunctor, LogGradFunctor);
REGISTER_OP_CPU_KERNEL(
log_grad_grad, ops::LogDoubleGradKernel<plat::CPUDeviceContext,
ops::LogGradGradFunctor<float>>,
ops::LogDoubleGradKernel<plat::CPUDeviceContext,
ops::LogGradGradFunctor<double>>,
ops::LogDoubleGradKernel<plat::CPUDeviceContext,
ops::LogGradGradFunctor<plat::float16>>);
/* ========================================================================== */
/* ========================== register checkpoint ===========================*/ /* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION(leaky_relu) REGISTER_OP_VERSION(leaky_relu)
.AddCheckpoint( .AddCheckpoint(
......
...@@ -193,3 +193,15 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -193,3 +193,15 @@ REGISTER_OP_CUDA_KERNEL(
ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext, ops::ActivationDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::AbsGradGradFunctor<int64_t>>); ops::AbsGradGradFunctor<int64_t>>);
/* ========================================================================== */ /* ========================================================================== */
/* ========================== Log register ==================================*/
REGISTER_ACTIVATION_CUDA_KERNEL(log, Log, LogFunctor, LogGradFunctor);
REGISTER_OP_CUDA_KERNEL(
log_grad_grad, ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<float>>,
ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<double>>,
ops::LogDoubleGradKernel<plat::CUDADeviceContext,
ops::LogGradGradFunctor<plat::float16>>);
/* ========================================================================== */
...@@ -1663,6 +1663,10 @@ class SquareDoubleGradKernel ...@@ -1663,6 +1663,10 @@ class SquareDoubleGradKernel
} }
}; };
template <typename DeviceContext, typename Functor>
class LogDoubleGradKernel
: public SquareDoubleGradKernel<DeviceContext, Functor> {};
template <typename DeviceContext, typename Functor> template <typename DeviceContext, typename Functor>
class ELUDoubleGradKernel class ELUDoubleGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> { : public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
...@@ -1852,6 +1856,37 @@ class PowGradKernel ...@@ -1852,6 +1856,37 @@ class PowGradKernel
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} }
}; };
template <typename T>
struct LogGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* X,
const framework::Tensor* ddX, framework::Tensor* ddOut,
const framework::Tensor* dOut, framework::Tensor* dX) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "LogGradGrad"));
auto x = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "LogGradGrad"));
// ddout = ddx / x; dx = -(dout / x) * (ddx / x)
// calculate dx first, so ddout can inplace ddx
if (dX) {
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Output", "DOut", "LogGradGrad"));
auto dx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Output", "DX", "LogGradGrad"));
dx.device(*d) = dout * static_cast<T>(-1) * ddx / (x * x);
}
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "LogGradGrad"));
ddout.device(*d) = ddx * static_cast<T>(1) / x;
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; }
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -1872,7 +1907,6 @@ class PowGradKernel ...@@ -1872,7 +1907,6 @@ class PowGradKernel
__macro(cosh, Cosh, CoshFunctor, CoshGradFunctor); \ __macro(cosh, Cosh, CoshFunctor, CoshGradFunctor); \
__macro(round, Round, RoundFunctor, ZeroGradFunctor); \ __macro(round, Round, RoundFunctor, ZeroGradFunctor); \
__macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \
__macro(log, Log, LogFunctor, LogGradFunctor); \
__macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \ __macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \
__macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \ __macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \
__macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \
......
...@@ -173,5 +173,29 @@ class TestAbsDoubleGradCheck(unittest.TestCase): ...@@ -173,5 +173,29 @@ class TestAbsDoubleGradCheck(unittest.TestCase):
self.func(p) self.func(p)
class TestLogDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
eps = 1e-6
dtype = np.float64
x = layers.data('x', shape, False, dtype)
x.persistable = True
y = layers.log(x)
x_arr = np.random.uniform(0.1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册