未验证 提交 480b284c 编写于 作者: N niuliling123 提交者: GitHub

modified reduce_max, reduce_min, reduce_prod to higher_performance implementation. (#32974)

上级 20eafd79
...@@ -13,46 +13,98 @@ See the License for the specific language governing permissions and ...@@ -13,46 +13,98 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string> #include <cmath>
#include <vector> #include <limits>
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/hostdevice.h" #include "paddle/fluid/platform/hostdevice.h"
#include "paddle/fluid/platform/macros.h" #ifdef __HIPCC__
#include <hip/hip_runtime.h>
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename Tx, typename Ty = Tx>
struct CustomMin { struct CustomMin {
__device__ __forceinline__ T operator()(const T &a, const T &b) const { using Transformer = detail::IdentityFunctor<Tx>;
inline Ty initial() {
return static_cast<Ty>(std::numeric_limits<Ty>::max());
}
__device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
return (b < a) ? b : a; return (b < a) ? b : a;
} }
}; };
template <typename T> template <typename Tx, typename Ty = Tx>
struct CustomMax { struct CustomMax {
__device__ __forceinline__ T operator()(const T &a, const T &b) const { using Transformer = detail::IdentityFunctor<Tx>;
inline Ty initial() {
return static_cast<Ty>(std::numeric_limits<Ty>::lowest());
}
__device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
return (b > a) ? b : a; return (b > a) ? b : a;
} }
}; };
template <typename T> // for cub::Reduce
template <typename Tx, typename Ty = Tx>
struct CustomSum { struct CustomSum {
__device__ __forceinline__ T operator()(const T &a, const T &b) const { using Transformer = detail::IdentityFunctor<Tx, Ty>;
inline Ty initial() { return static_cast<Ty>(0.0f); }
__device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
return b + a; return b + a;
} }
}; };
template <typename T> template <typename Tx, typename Ty = Tx>
struct CustomMean {
using Transformer = detail::DivideFunctor<Tx>;
inline Ty initial() { return static_cast<Ty>(0.0f); }
__device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
return b + a;
}
};
template <typename Tx, typename Ty = Tx>
struct CustomMul { struct CustomMul {
__device__ __forceinline__ T operator()(const T &a, const T &b) const { using Transformer = detail::IdentityFunctor<Tx>;
inline Ty initial() { return static_cast<Ty>(1.0f); }
__device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
return b * a; return b * a;
} }
}; };
template <typename Tx, typename Ty = Tx>
struct CustomLogicalOr {
using Transformer = detail::IdentityFunctor<Tx>;
inline Ty initial() { return static_cast<Ty>(false); }
__device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
return b || a;
}
};
template <typename Tx, typename Ty = Tx>
struct CustomLogicalAnd {
using Transformer = detail::IdentityFunctor<Tx>;
inline Ty initial() { return static_cast<Ty>(true); }
__device__ __forceinline__ Ty operator()(const Ty &a, const Ty &b) const {
return b && a;
}
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -11,15 +11,13 @@ ...@@ -11,15 +11,13 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h" // reduce_max
REGISTER_OP_CUDA_KERNEL(
REGISTER_OP_CUDA_KERNEL(reduce_max, reduce_max, ops::ReduceCudaKernel<float, paddle::operators::CustomMax>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext, ops::ReduceCudaKernel<double, paddle::operators::CustomMax>,
float, ops::MaxFunctor>, ops::ReduceCudaKernel<int, paddle::operators::CustomMax>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext, ops::ReduceCudaKernel<int64_t, paddle::operators::CustomMax>);
double, ops::MaxFunctor>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
int, ops::MaxFunctor>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
int64_t, ops::MaxFunctor>);
...@@ -11,15 +11,13 @@ ...@@ -11,15 +11,13 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h" // reduce_min
REGISTER_OP_CUDA_KERNEL(
REGISTER_OP_CUDA_KERNEL(reduce_min, reduce_min, ops::ReduceCudaKernel<float, paddle::operators::CustomMin>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext, ops::ReduceCudaKernel<double, paddle::operators::CustomMin>,
float, ops::MinFunctor>, ops::ReduceCudaKernel<int, paddle::operators::CustomMin>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext, ops::ReduceCudaKernel<int64_t, paddle::operators::CustomMin>);
double, ops::MinFunctor>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
int, ops::MinFunctor>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
int64_t, ops::MinFunctor>);
...@@ -12,26 +12,22 @@ ...@@ -12,26 +12,22 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_functor_op.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h" #include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h"
// reduce_prod
#ifdef __HIPCC__ #ifdef __HIPCC__
// Eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h:922 // Eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h:922
// do not support double in HIPCC platform (Eigen3 to be fixed) // do not support double in HIPCC platform (Eigen3 to be fixed)
REGISTER_OP_CUDA_KERNEL(reduce_prod, REGISTER_OP_CUDA_KERNEL(
ops::ReduceKernel<paddle::platform::CUDADeviceContext, reduce_prod, ops::ReduceCudaKernel<float, paddle::operators::CustomMul>,
float, ops::ProdFunctor>, ops::ReduceCudaKernel<int, paddle::operators::CustomMul>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext, ops::ReduceCudaKernel<int64_t, paddle::operators::CustomMul>);
int, ops::ProdFunctor>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
int64_t, ops::ProdFunctor>);
#else #else
REGISTER_OP_CUDA_KERNEL(reduce_prod, REGISTER_OP_CUDA_KERNEL(
ops::ReduceKernel<paddle::platform::CUDADeviceContext, reduce_prod, ops::ReduceCudaKernel<float, paddle::operators::CustomMul>,
float, ops::ProdFunctor>, ops::ReduceCudaKernel<int, paddle::operators::CustomMul>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext, ops::ReduceCudaKernel<double, paddle::operators::CustomMul>,
double, ops::ProdFunctor>, ops::ReduceCudaKernel<int64_t, paddle::operators::CustomMul>);
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
int, ops::ProdFunctor>,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
int64_t, ops::ProdFunctor>);
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册