From d1e89ead550a5c847abb1614b60ce686a3a532c6 Mon Sep 17 00:00:00 2001 From: wuhuanzhou Date: Wed, 2 Jun 2021 10:48:29 +0800 Subject: [PATCH] optimize OP's compilation time implemented by Eigen, test=develop (#33218) --- paddle/fluid/operators/eigen/eigen_function.h | 20 +++++++++++ paddle/fluid/operators/eigen/loss.cc | 33 +++++++++++++++++++ paddle/fluid/operators/eigen/loss.cu | 33 +++++++++++++++++++ paddle/fluid/operators/log_loss_op.cc | 5 +++ paddle/fluid/operators/log_loss_op.cu | 21 ------------ paddle/fluid/operators/log_loss_op.h | 11 +++---- paddle/fluid/operators/top_k_function_cuda.h | 13 +++++--- 7 files changed, 105 insertions(+), 31 deletions(-) delete mode 100644 paddle/fluid/operators/log_loss_op.cu diff --git a/paddle/fluid/operators/eigen/eigen_function.h b/paddle/fluid/operators/eigen/eigen_function.h index 8cbc7cd6acd..9a3be7ca439 100644 --- a/paddle/fluid/operators/eigen/eigen_function.h +++ b/paddle/fluid/operators/eigen/eigen_function.h @@ -196,6 +196,26 @@ struct EigenRankLossGrad { const InType& left, const InType& right); }; +template +struct EigenLogLoss { + using InType = Eigen::TensorMap< + Eigen::Tensor>; + using OutType = + Eigen::TensorMap>; + static void Eval(const EigenDevice& dev, OutType out, const InType& pred, + const InType& label, const T& epsilon); +}; + +template +struct EigenLogLossGrad { + using InType = Eigen::TensorMap< + Eigen::Tensor>; + using OutType = + Eigen::TensorMap>; + static void Eval(const EigenDevice& dev, OutType dpred, const InType& dloss, + const InType& pred, const InType& label, const T& epsilon); +}; + template struct EigenHingeLoss { using InType = Eigen::TensorMap< diff --git a/paddle/fluid/operators/eigen/loss.cc b/paddle/fluid/operators/eigen/loss.cc index 22a3647bc31..469456537d9 100644 --- a/paddle/fluid/operators/eigen/loss.cc +++ b/paddle/fluid/operators/eigen/loss.cc @@ -53,6 +53,39 @@ struct EigenRankLossGrad { template struct EigenRankLoss; template struct EigenRankLossGrad; +template +struct EigenLogLoss { + using InType = Eigen::TensorMap< + Eigen::Tensor>; + using OutType = + Eigen::TensorMap>; + static void Eval(const Eigen::DefaultDevice& dev, OutType out, + const InType& pred, const InType& label, const T& epsilon) { + out.device(dev) = (-(label * (pred + epsilon).log()) - + ((static_cast(1) - label) * + (static_cast(1) - pred + epsilon).log())); + } +}; + +template +struct EigenLogLossGrad { + using InType = Eigen::TensorMap< + Eigen::Tensor>; + using OutType = + Eigen::TensorMap>; + static void Eval(const Eigen::DefaultDevice& dev, OutType dpred, + const InType& dloss, const InType& pred, const InType& label, + const T& epsilon) { + dpred.device(dev) = + dloss * + (-(label / (pred + epsilon)) + + ((static_cast(1) - label) / (static_cast(1) - pred + epsilon))); + } +}; + +template struct EigenLogLoss; +template struct EigenLogLossGrad; + template struct EigenHingeLoss { using InType = Eigen::TensorMap< diff --git a/paddle/fluid/operators/eigen/loss.cu b/paddle/fluid/operators/eigen/loss.cu index fac7e3370bc..02341202a2b 100644 --- a/paddle/fluid/operators/eigen/loss.cu +++ b/paddle/fluid/operators/eigen/loss.cu @@ -53,6 +53,39 @@ struct EigenRankLossGrad { template struct EigenRankLoss; template struct EigenRankLossGrad; +template +struct EigenLogLoss { + using InType = Eigen::TensorMap< + Eigen::Tensor>; + using OutType = + Eigen::TensorMap>; + static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& pred, + const InType& label, const T& epsilon) { + out.device(dev) = (-(label * (pred + epsilon).log()) - + ((static_cast(1) - label) * + (static_cast(1) - pred + epsilon).log())); + } +}; + +template +struct EigenLogLossGrad { + using InType = Eigen::TensorMap< + Eigen::Tensor>; + using OutType = + Eigen::TensorMap>; + static void Eval(const Eigen::GpuDevice& dev, OutType dpred, + const InType& dloss, const InType& pred, const InType& label, + const T& epsilon) { + dpred.device(dev) = + dloss * + (-(label / (pred + epsilon)) + + ((static_cast(1) - label) / (static_cast(1) - pred + epsilon))); + } +}; + +template struct EigenLogLoss; +template struct EigenLogLossGrad; + template struct EigenHingeLoss { using InType = Eigen::TensorMap< diff --git a/paddle/fluid/operators/log_loss_op.cc b/paddle/fluid/operators/log_loss_op.cc index 1569512dc74..c41805d41ce 100644 --- a/paddle/fluid/operators/log_loss_op.cc +++ b/paddle/fluid/operators/log_loss_op.cc @@ -154,3 +154,8 @@ REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL( log_loss_grad, ops::LogLossGradKernel); +REGISTER_OP_CUDA_KERNEL( + log_loss, ops::LogLossKernel); +REGISTER_OP_CUDA_KERNEL( + log_loss_grad, + ops::LogLossGradKernel); diff --git a/paddle/fluid/operators/log_loss_op.cu b/paddle/fluid/operators/log_loss_op.cu deleted file mode 100644 index 280913c43a2..00000000000 --- a/paddle/fluid/operators/log_loss_op.cu +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ -#include "paddle/fluid/operators/log_loss_op.h" - -namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL( - log_loss, ops::LogLossKernel); -REGISTER_OP_CUDA_KERNEL( - log_loss_grad, - ops::LogLossGradKernel); diff --git a/paddle/fluid/operators/log_loss_op.h b/paddle/fluid/operators/log_loss_op.h index e62de17a986..e7985ab810b 100644 --- a/paddle/fluid/operators/log_loss_op.h +++ b/paddle/fluid/operators/log_loss_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/eigen/eigen_function.h" namespace paddle { namespace operators { @@ -40,9 +41,8 @@ class LogLossKernel : public framework::OpKernel { auto loss = EigenVector::Flatten(*loss_out); auto& place = *ctx.template device_context().eigen_device(); - loss.device(place) = (-(label * (prediction + epsilon).log()) - - ((static_cast(1) - label) * - (static_cast(1) - prediction + epsilon).log())); + EigenLogLoss, T>::Eval( + place, loss, prediction, label, epsilon); } }; @@ -64,9 +64,8 @@ class LogLossGradKernel : public framework::OpKernel { if (dpred) { dpred->mutable_data(ctx.GetPlace()); auto dx = framework::EigenVector::Flatten(*dpred); - dx.device(place) = dl * (-(label / (prediction + epsilon)) + - ((static_cast(1) - label) / - (static_cast(1) - prediction + epsilon))); + EigenLogLossGrad, T>::Eval( + place, dx, dl, prediction, label, epsilon); } } }; diff --git a/paddle/fluid/operators/top_k_function_cuda.h b/paddle/fluid/operators/top_k_function_cuda.h index a7d7ea260ec..07749f90eba 100644 --- a/paddle/fluid/operators/top_k_function_cuda.h +++ b/paddle/fluid/operators/top_k_function_cuda.h @@ -22,6 +22,7 @@ limitations under the License. */ #ifdef __HIPCC__ #include #endif +#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/top_k_op.h" #include "paddle/fluid/platform/cuda_device_function.h" #include "paddle/fluid/platform/float16.h" @@ -563,15 +564,19 @@ bool SortTopk(const platform::CUDADeviceContext& ctx, const Eigen::DSizes slice_sizes{num_rows, k}; auto e_indices = framework::EigenMatrix::From(*indices_tensor, dim); - auto e_tmp_indices = framework::EigenMatrix::From(temp_indices); + auto e_tmp_indices = framework::EigenMatrix::From( + static_cast(temp_indices)); std::vector odims = {static_cast(num_rows), static_cast(k)}; auto dim = framework::make_ddim(odims); auto e_values = framework::EigenMatrix::From(*out_tensor, dim); - auto e_tmp_values = framework::EigenMatrix::From(temp_values); + auto e_tmp_values = + framework::EigenMatrix::From(static_cast(temp_values)); - e_indices.device(dev) = e_tmp_indices.slice(slice_indices, slice_sizes); - e_values.device(dev) = e_tmp_values.slice(slice_indices, slice_sizes); + EigenSlice, int64_t, 2>::Eval( + dev, e_indices, e_tmp_indices, slice_indices, slice_sizes); + EigenSlice, T, 2>::Eval( + dev, e_values, e_tmp_values, slice_indices, slice_sizes); } return true; } -- GitLab