未验证 提交 8895379a 编写于 作者: C Chen Weihang 提交者: GitHub

[Phi] Support cudnn kernel moving & move softmax kernels (#39547)

* support cudnn kernel moving

* polish cmake rules

* add unittest for coverage

* remove orig kernel

* remove softmax cudnn kernel

* fix softmax test failed

* fix npu func error

* resolve conflict

* rename gpu dnn kernels

* fix name rule error

* fix compile error

* update fp16 namespace
上级 fed6de40
......@@ -81,6 +81,8 @@ function(kernel_declare TARGET_LIST)
file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, GPU, ALL_LAYOUT);\n")
elseif (${kernel_path} MATCHES "./xpu\/")
file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, XPU, ALL_LAYOUT);\n")
elseif (${kernel_path} MATCHES "./gpudnn\/")
file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, GPUDNN, ALL_LAYOUT);\n")
else ()
# deal with device independent kernel, now we use CPU temporaary
file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n")
......@@ -94,6 +96,7 @@ function(kernel_library TARGET)
set(cpu_srcs)
set(gpu_srcs)
set(xpu_srcs)
set(gpudnn_srcs)
set(selected_rows_srcs)
# parse and save the deps kerenl targets
set(all_srcs)
......@@ -101,6 +104,8 @@ function(kernel_library TARGET)
set(oneValueArgs SUB_DIR)
set(multiValueArgs SRCS DEPS)
set(target_build_flag 1)
cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
......@@ -123,6 +128,9 @@ function(kernel_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc)
list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}_gpudnn.cu)
list(APPEND gpudnn_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}_gpudnn.cu)
endif()
endif()
if (WITH_XPU)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc)
......@@ -141,6 +149,7 @@ function(kernel_library TARGET)
list(APPEND all_srcs ${cpu_srcs})
list(APPEND all_srcs ${gpu_srcs})
list(APPEND all_srcs ${xpu_srcs})
list(APPEND all_srcs ${gpudnn_srcs})
foreach(src ${all_srcs})
file(READ ${src} target_content)
string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
......@@ -166,21 +175,22 @@ function(kernel_library TARGET)
list(LENGTH cpu_srcs cpu_srcs_len)
list(LENGTH gpu_srcs gpu_srcs_len)
list(LENGTH xpu_srcs xpu_srcs_len)
list(LENGTH gpudnn_srcs gpudnn_srcs_len)
list(LENGTH selected_rows_srcs selected_rows_srcs_len)
# Build Target according different src organization
if((${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR
${xpu_srcs_len} GREATER 0) AND (${common_srcs_len} GREATER 0 OR
${selected_rows_srcs_len} GREATER 0))
${xpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0) AND
(${common_srcs_len} GREATER 0 OR ${selected_rows_srcs_len} GREATER 0))
# If the common_srcs/selected_rows_srcs depends on specific device srcs, build target using this rule.
if (WITH_GPU)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
nv_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
nv_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
endif()
elseif (WITH_ROCM)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
hip_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
hip_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
endif()
else()
......@@ -190,14 +200,14 @@ function(kernel_library TARGET)
endif()
endif()
# If there are only specific device srcs, build target using this rule.
elseif (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
elseif (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
if (WITH_GPU)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_ROCM)
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
else()
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
......@@ -234,35 +244,40 @@ function(kernel_library TARGET)
cc_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
else()
message(FATAL_ERROR "Cannot find any implementation for ${TARGET}")
set(target_build_flag 0)
endif()
if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR
${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR
${selected_rows_srcs_len} GREATER 0)
# append target into PHI_KERNELS property
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(phi_kernels ${phi_kernels} ${TARGET})
set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels})
endif()
if (${target_build_flag} EQUAL 1)
if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR
${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR
${gpudnn_srcs_len} GREATER 0 OR ${selected_rows_srcs_len} GREATER 0)
# append target into PHI_KERNELS property
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(phi_kernels ${phi_kernels} ${TARGET})
set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels})
endif()
# parse kernel name and auto generate kernel declaration
# here, we don't need to check WITH_XXX, because if not WITH_XXX, the
# xxx_srcs_len will be equal to 0
if (${common_srcs_len} GREATER 0)
kernel_declare(${common_srcs})
endif()
if (${cpu_srcs_len} GREATER 0)
kernel_declare(${cpu_srcs})
endif()
if (${gpu_srcs_len} GREATER 0)
kernel_declare(${gpu_srcs})
endif()
if (${xpu_srcs_len} GREATER 0)
kernel_declare(${xpu_srcs})
endif()
if (${selected_rows_srcs_len} GREATER 0)
kernel_declare(${selected_rows_srcs})
# parse kernel name and auto generate kernel declaration
# here, we don't need to check WITH_XXX, because if not WITH_XXX, the
# xxx_srcs_len will be equal to 0
if (${common_srcs_len} GREATER 0)
kernel_declare(${common_srcs})
endif()
if (${cpu_srcs_len} GREATER 0)
kernel_declare(${cpu_srcs})
endif()
if (${gpu_srcs_len} GREATER 0)
kernel_declare(${gpu_srcs})
endif()
if (${xpu_srcs_len} GREATER 0)
kernel_declare(${xpu_srcs})
endif()
if (${gpudnn_srcs_len} GREATER 0)
kernel_declare(${gpudnn_srcs})
endif()
if (${selected_rows_srcs_len} GREATER 0)
kernel_declare(${selected_rows_srcs})
endif()
endif()
endfunction()
......
......@@ -20,7 +20,7 @@
#include "paddle/fluid/framework/ir/pass_tester_helper.h"
#include "paddle/fluid/framework/op_registry.h"
USE_OP(softmax);
USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
......
......@@ -67,7 +67,7 @@ OpKernelType TransPtenKernelKeyToOpKernelType(
LibraryType library_type = LibraryType::kPlain;
if (kernel_key.backend() == phi::Backend::MKLDNN) {
library_type = LibraryType::kMKLDNN;
} else if (kernel_key.backend() == phi::Backend::CUDNN) {
} else if (kernel_key.backend() == phi::Backend::GPUDNN) {
library_type = LibraryType::kCUDNN;
} else {
// do nothing
......@@ -82,7 +82,7 @@ phi::KernelKey TransOpKernelTypeToPtenKernelKey(
if (kernel_type.library_type_ == LibraryType::kMKLDNN) {
backend = phi::Backend::MKLDNN;
} else if (kernel_type.library_type_ == LibraryType::kCUDNN) {
backend = phi::Backend::CUDNN;
backend = phi::Backend::GPUDNN;
} else {
// do
}
......
......@@ -42,7 +42,7 @@ TEST(PtenUtils, TransPtenKernelKeyToOpKernelType) {
#endif
#ifdef PADDLE_WITH_CUDA
phi::KernelKey kernel_key_cudnn(phi::Backend::CUDNN, phi::DataLayout::NCHW,
phi::KernelKey kernel_key_cudnn(phi::Backend::GPUDNN, phi::DataLayout::NCHW,
phi::DataType::FLOAT32);
op_kernel_type =
paddle::framework::TransPtenKernelKeyToOpKernelType(kernel_key_cudnn);
......@@ -53,3 +53,38 @@ TEST(PtenUtils, TransPtenKernelKeyToOpKernelType) {
paddle::framework::LibraryType::kCUDNN);
#endif
}
TEST(PtenUtils, TransOpKernelTypeToPtenKernelKey) {
paddle::framework::OpKernelType op_kernel_type(
paddle::framework::proto::VarType::FP32, paddle::platform::CPUPlace(),
paddle::framework::DataLayout::kNCHW);
auto kernel_key =
paddle::framework::TransOpKernelTypeToPtenKernelKey(op_kernel_type);
ASSERT_EQ(kernel_key.dtype(), phi::DataType::FLOAT32);
ASSERT_EQ(kernel_key.layout(), phi::DataLayout::NCHW);
ASSERT_EQ(kernel_key.backend(), phi::Backend::CPU);
#ifdef PADDLE_WITH_MKLDNN
paddle::framework::OpKernelType op_kernel_type_mkldnn(
paddle::framework::proto::VarType::FP32, paddle::platform::CPUPlace(),
paddle::framework::DataLayout::kMKLDNN,
paddle::framework::LibraryType::kMKLDNN);
auto kernel_key_mkldnn = paddle::framework::TransOpKernelTypeToPtenKernelKey(
op_kernel_type_mkldnn);
ASSERT_EQ(kernel_key_mkldnn.dtype(), phi::DataType::FLOAT32);
ASSERT_EQ(kernel_key_mkldnn.layout(), phi::DataLayout::MKLDNN);
ASSERT_EQ(kernel_key_mkldnn.backend(), phi::Backend::MKLDNN);
#endif
#ifdef PADDLE_WITH_CUDA
paddle::framework::OpKernelType op_kernel_type_cudnn(
paddle::framework::proto::VarType::FP32, paddle::platform::CPUPlace(),
paddle::framework::DataLayout::kNCHW,
paddle::framework::LibraryType::kCUDNN);
auto kernel_key_cudnn =
paddle::framework::TransOpKernelTypeToPtenKernelKey(op_kernel_type_cudnn);
ASSERT_EQ(kernel_key_cudnn.dtype(), phi::DataType::FLOAT32);
ASSERT_EQ(kernel_key_cudnn.layout(), phi::DataLayout::NCHW);
ASSERT_EQ(kernel_key_cudnn.backend(), phi::Backend::GPUDNN);
#endif
}
......@@ -88,5 +88,5 @@ class SoftMaxOpConverter : public OpConverter {
} // namespace inference
} // namespace paddle
USE_OP(softmax);
USE_OP_ITSELF(softmax);
REGISTER_TRT_OP_CONVERTER(softmax, SoftMaxOpConverter);
......@@ -45,4 +45,4 @@ TEST(SoftMaxOpConverter, main) {
} // namespace inference
} // namespace paddle
USE_OP(softmax);
USE_OP_ITSELF(softmax);
......@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/device/gpu/nccl_helper.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace paddle {
namespace operators {
......@@ -98,8 +99,8 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
const auto& labels_dims = labels->dims();
const int axis = logits_dims.size() - 1;
const int N = SizeToAxis(axis, logits_dims);
const int D = SizeFromAxis(axis, logits_dims);
const int N = phi::funcs::SizeToAxis(axis, logits_dims);
const int D = phi::funcs::SizeFromAxis(axis, logits_dims);
Tensor logits_2d, softmax_2d, loss_2d;
logits_2d.ShareDataWith(*logits).Resize({N, D});
......@@ -220,8 +221,8 @@ class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
}
const auto sofrmax_dims = softmax->dims();
const int axis = sofrmax_dims.size() - 1;
const int N = SizeToAxis(axis, sofrmax_dims);
const int D = SizeFromAxis(axis, sofrmax_dims);
const int N = phi::funcs::SizeToAxis(axis, sofrmax_dims);
const int D = phi::funcs::SizeFromAxis(axis, sofrmax_dims);
Tensor logit_grad_2d;
logit_grad_2d.ShareDataWith(*logit_grad).Resize({N, D});
......
......@@ -23,7 +23,6 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/softmax_op.h"
namespace paddle {
namespace operators {
......
......@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/dropout_impl.cu.h"
#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h"
#include "paddle/fluid/operators/softmax_cudnn_op.cu.h"
#include "paddle/fluid/operators/transpose_op.cu.h"
#include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h"
namespace paddle {
namespace operators {
......@@ -123,11 +123,11 @@ class FMHARef {
T, T>(
dev_ctx_, ins, &outs, elewise_add_axis, AddFunctor<T>());
SoftmaxForwardCUDAKernelDriver<T>(dev_ctx_, *src_mask_out_tensor,
softmax_axis, softmax_out_tensor);
phi::SoftmaxForwardCUDAKernelDriver<T>(dev_ctx_, *src_mask_out_tensor,
softmax_axis, softmax_out_tensor);
} else {
SoftmaxForwardCUDAKernelDriver<T>(dev_ctx_, *qk_out_tensor, softmax_axis,
softmax_out_tensor);
phi::SoftmaxForwardCUDAKernelDriver<T>(dev_ctx_, *qk_out_tensor,
softmax_axis, softmax_out_tensor);
}
transB = CblasNoTrans;
......@@ -251,9 +251,9 @@ class FMHARef {
}
if (src_mask_tensor != nullptr) {
SoftmaxBackwardCUDAKernelDriver<T>(dev_ctx_, softmax_out_tensor,
*softmax_out_grad_tensor, softmax_axis,
src_mask_out_grad_tensor);
phi::SoftmaxBackwardCUDAKernelDriver<T>(
dev_ctx_, softmax_out_tensor, *softmax_out_grad_tensor, softmax_axis,
src_mask_out_grad_tensor);
// recall LaunchElementwiseCudaKernel fw: src_mask_out = qk_out +
// src_mask
......@@ -272,9 +272,9 @@ class FMHARef {
}
} else {
SoftmaxBackwardCUDAKernelDriver<T>(dev_ctx_, softmax_out_tensor,
*softmax_out_grad_tensor, softmax_axis,
qk_out_grad_tensor);
phi::SoftmaxBackwardCUDAKernelDriver<T>(dev_ctx_, softmax_out_tensor,
*softmax_out_grad_tensor,
softmax_axis, qk_out_grad_tensor);
}
T* qk_out_grad_data = qk_out_grad_tensor->data<T>();
......
......@@ -26,6 +26,7 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
......@@ -246,8 +247,8 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel<T> {
const auto& labels_dims = labels->dims();
const int axis = logits_dims.size() - 1;
const int N = SizeToAxis(axis, logits_dims);
const int D = SizeFromAxis(axis, logits_dims);
const int N = phi::funcs::SizeToAxis(axis, logits_dims);
const int D = phi::funcs::SizeFromAxis(axis, logits_dims);
int blocks = NumBlocks(N);
int threads = kNumCUDAThreads;
......@@ -401,8 +402,8 @@ class MarginCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
const auto sofrmax_dims = softmax->dims();
const int axis = sofrmax_dims.size() - 1;
const int N = SizeToAxis(axis, sofrmax_dims);
const int D = SizeFromAxis(axis, sofrmax_dims);
const int N = phi::funcs::SizeToAxis(axis, sofrmax_dims);
const int D = phi::funcs::SizeFromAxis(axis, sofrmax_dims);
if (return_softmax) {
framework::TensorCopy(*softmax, context.GetPlace(),
......
......@@ -22,7 +22,6 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/softmax_op.h"
namespace paddle {
namespace operators {
......
......@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/math/softmax_impl.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
namespace paddle {
namespace operators {
......@@ -26,6 +27,13 @@ template class SoftmaxFunctor<platform::CPUDeviceContext, double, false>;
template class SoftmaxGradFunctor<platform::CPUDeviceContext, float>;
template class SoftmaxGradFunctor<platform::CPUDeviceContext, double>;
template class SoftmaxFunctor<phi::CPUContext, float, true>;
template class SoftmaxFunctor<phi::CPUContext, float, false>;
template class SoftmaxFunctor<phi::CPUContext, double, true>;
template class SoftmaxFunctor<phi::CPUContext, double, false>;
template class SoftmaxGradFunctor<phi::CPUContext, float>;
template class SoftmaxGradFunctor<phi::CPUContext, double>;
} // namespace math
} // namespace operators
} // namespace paddle
......@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/math/softmax_impl.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
......@@ -139,6 +140,16 @@ template class SoftmaxGradFunctor<platform::CUDADeviceContext, double>;
template class SoftmaxGradFunctor<platform::CUDADeviceContext,
platform::float16>;
template class SoftmaxFunctor<phi::GPUContext, platform::float16, false>;
template class SoftmaxFunctor<phi::GPUContext, platform::float16, true>;
template class SoftmaxFunctor<phi::GPUContext, float, false>;
template class SoftmaxFunctor<phi::GPUContext, double, false>;
template class SoftmaxFunctor<phi::GPUContext, float, true>;
template class SoftmaxFunctor<phi::GPUContext, double, true>;
template class SoftmaxGradFunctor<phi::GPUContext, float>;
template class SoftmaxGradFunctor<phi::GPUContext, double>;
template class SoftmaxGradFunctor<phi::GPUContext, platform::float16>;
} // namespace math
} // namespace operators
} // namespace paddle
......@@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace paddle {
namespace operators {
......@@ -70,7 +71,8 @@ class SoftmaxMKLDNNHandler
out_grad->dims(), in_x_grad->dims()));
auto dims = out_grad->dims(); // input and output share the same shape
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), dims.size());
const int axis =
phi::funcs::CanonicalAxis(ctx.Attr<int>("axis"), dims.size());
auto softmax_tz = phi::vectorize<int64_t>(dims);
auto data_softmax_md = MKLDNNMemDesc(
......@@ -96,7 +98,8 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
Tensor* output = ctx.Output<Tensor>("Out");
bool is_inplaced = input->IsSharedBufferWith(*output);
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), input->dims().size());
const int axis =
phi::funcs::CanonicalAxis(ctx.Attr<int>("axis"), input->dims().size());
SoftmaxMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), input,
output, axis);
......
......@@ -31,7 +31,7 @@ USE_OP(elementwise_mul);
USE_OP_DEVICE_KERNEL(elementwise_mul, MKLDNN);
USE_OP(relu);
USE_OP_DEVICE_KERNEL(relu, MKLDNN);
USE_OP(softmax);
USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
USE_OP(conv2d);
USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);
......
......@@ -29,7 +29,7 @@ USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP(relu);
USE_OP_DEVICE_KERNEL(relu, MKLDNN);
USE_OP(softmax);
USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
namespace paddle {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/softmax_cudnn_op.cu.h"
namespace paddle {
namespace operators {
template <typename T, bool LogMode = false>
class SoftmaxCUDNNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* out = ctx.Output<Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
int input_axis = ctx.Attr<int>("axis");
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
SoftmaxForwardCUDAKernelDriver<T>(dev_ctx, *x, input_axis, out);
}
};
template <typename T, bool LogMode = false>
class SoftmaxGradCUDNNKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* out = ctx.Input<Tensor>("Out");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
dx->mutable_data<T>(ctx.GetPlace());
int input_axis = ctx.Attr<int>("axis");
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
SoftmaxBackwardCUDAKernelDriver<T>(dev_ctx, *out, *dout, input_axis, dx);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
#ifdef PADDLE_WITH_HIP
// MIOPEN do not support double
REGISTER_OP_KERNEL(softmax, CUDNN, plat::CUDAPlace,
ops::SoftmaxCUDNNKernel<float>,
ops::SoftmaxCUDNNKernel<plat::float16>);
REGISTER_OP_KERNEL(softmax_grad, CUDNN, plat::CUDAPlace,
ops::SoftmaxGradCUDNNKernel<float>,
ops::SoftmaxGradCUDNNKernel<plat::float16>);
#else
REGISTER_OP_KERNEL(softmax, CUDNN, plat::CUDAPlace,
ops::SoftmaxCUDNNKernel<float>,
ops::SoftmaxCUDNNKernel<double>,
ops::SoftmaxCUDNNKernel<plat::float16>);
REGISTER_OP_KERNEL(softmax_grad, CUDNN, plat::CUDAPlace,
ops::SoftmaxGradCUDNNKernel<float>,
ops::SoftmaxGradCUDNNKernel<double>,
ops::SoftmaxGradCUDNNKernel<plat::float16>);
#endif
......@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#ifdef PADDLE_WITH_MKLDNN
......@@ -251,10 +250,3 @@ REGISTER_OPERATOR(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker,
ops::SoftmaxOpGradMaker<paddle::imperative::OpBase>,
ops::SoftmaxInplaceInferer);
REGISTER_OPERATOR(softmax_grad, ops::SoftmaxOpGrad);
REGISTER_OP_CPU_KERNEL(
softmax, ops::SoftmaxKernel<paddle::platform::CPUDeviceContext, float>,
ops::SoftmaxKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
softmax_grad,
ops::SoftmaxGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SoftmaxGradKernel<paddle::platform::CPUDeviceContext, double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/softmax.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using DDim = framework::DDim;
static inline int CanonicalAxis(const int axis, const int rank) {
if (axis < 0) {
return axis + rank;
}
return axis;
}
static inline int SizeToAxis(const int axis, DDim dims) {
int size = 1;
for (int i = 0; i < axis; i++) {
size *= dims[i];
}
return size;
}
static inline int SizeFromAxis(const int axis, DDim dims) {
int size = 1;
for (int i = axis; i < dims.size(); i++) {
size *= dims[i];
}
return size;
}
static inline int SizeOutAxis(const int axis, DDim dims) {
int size = 1;
for (int i = axis + 1; i < dims.size(); i++) {
size *= dims[i];
}
return size;
}
template <typename DeviceContext, typename T>
class SoftmaxKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* X = context.Input<Tensor>("X");
auto* Out = context.Output<Tensor>("Out");
const int rank = X->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = X->dims()[axis];
// allocate memory on device.
Out->mutable_data<T>(context.GetPlace());
if (Out->numel() == 0) {
return;
}
const int n = SizeToAxis(axis, X->dims());
const int d = SizeFromAxis(axis, X->dims());
Tensor X_2d, Out_2d;
X_2d.ShareDataWith(*X).Resize({n, d});
Out_2d.ShareDataWith(*Out).Resize({n, d});
math::SoftmaxFunctor<DeviceContext, T, false>()(
context.template device_context<DeviceContext>(), axis_dim, &X_2d,
&Out_2d);
}
};
template <typename DeviceContext, typename T>
class SoftmaxGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* Out = context.Input<Tensor>("Out");
auto* dOut = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dX = context.Output<Tensor>(framework::GradVarName("X"));
const int rank = dX->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = dX->dims()[axis];
// allocate memory on device.
dX->mutable_data<T>(context.GetPlace());
if (dX->numel() == 0) {
return;
}
const int n = SizeToAxis(axis, dX->dims());
const int d = SizeFromAxis(axis, dX->dims());
Tensor dX_2d, Out_2d, dOut_2d;
dX_2d.ShareDataWith(*dX).Resize({n, d});
Out_2d.ShareDataWith(*Out).Resize({n, d});
dOut_2d.ShareDataWith(*dOut).Resize({n, d});
math::SoftmaxGradFunctor<DeviceContext, T>()(
context.template device_context<DeviceContext>(), axis_dim, &Out_2d,
&dOut_2d, &dX_2d);
}
};
} // namespace operators
} // namespace paddle
......@@ -12,8 +12,9 @@ limitations under the License. */
#include <memory>
#include <string>
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace paddle {
namespace operators {
......@@ -51,7 +52,7 @@ class SoftmaxGradNPUKernel : public framework::OpKernel<T> {
auto dims = dX->dims();
const int rank = dims.size();
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), rank);
const int axis = phi::funcs::CanonicalAxis(ctx.Attr<int>("axis"), rank);
int64_t first_dim = 1;
int64_t sec_dim = 1;
for (int i = 0; i < axis; i++) {
......
......@@ -29,7 +29,7 @@ limitations under the License. */
namespace f = paddle::framework;
namespace p = paddle::platform;
USE_OP(softmax);
USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, NPU);
template <typename T>
......
......@@ -11,8 +11,8 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace paddle {
namespace operators {
......@@ -29,7 +29,7 @@ class SoftmaxXPUKernel : public framework::OpKernel<T> {
auto* x = context.Input<Tensor>("X");
auto* out = context.Output<Tensor>("Out");
const int rank = x->dims().size();
int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
// allocate memory on device.
out->mutable_data<T>(context.GetPlace());
......@@ -88,7 +88,7 @@ class SoftmaxGradXPUKernel : public framework::OpKernel<T> {
auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
const int rank = dx->dims().size();
int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
// allocate memory on device.
dx->mutable_data<T>(context.GetPlace());
......
......@@ -153,7 +153,7 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel {
"Attr(axis) value should be in range [-R, R-1], "
"R is the rank of Input(Logits)."));
axis = CanonicalAxis(axis, logits_rank);
axis = phi::funcs::CanonicalAxis(axis, logits_rank);
for (int i = 0; i < logits_rank; i++) {
if (i != axis) {
if (ctx->IsRuntime() || (logits_dims[i] > 0 && labels_dims[i] > 0)) {
......@@ -250,7 +250,7 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel {
"Attr(axis) value should be in range [-R, R-1], "
"R is the rank of Input(Logits)."));
axis = CanonicalAxis(axis, softmax_rank);
axis = phi::funcs::CanonicalAxis(axis, softmax_rank);
for (int i = 0; i < softmax_rank; i++) {
if (i != axis) {
if (ctx->IsRuntime() || (softmax_dims[i] > 0 && labels_dims[i] > 0)) {
......
......@@ -17,12 +17,12 @@ namespace cub = hipcub;
#endif
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/softmax_cudnn_op.cu.h"
#include "paddle/fluid/operators/softmax_with_cross_entropy_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h"
namespace paddle {
namespace operators {
......@@ -236,7 +236,7 @@ __global__ void WarpSoftmaxForward(T* loss, T* softmax, const T* src,
max_value[i] = (max_value[i] > valmax) ? max_value[i] : valmax;
}
}
WarpReduceMax<AccT, kBatchSize, kWarpSize>(max_value);
phi::WarpReduceMax<AccT, kBatchSize, kWarpSize>(max_value);
// compute sum: s_{i} = sum_{j}{ exp(src_{i,j} - maxvalue_{i} }
AccT sum[kBatchSize];
......@@ -276,7 +276,7 @@ __global__ void WarpSoftmaxForward(T* loss, T* softmax, const T* src,
}
}
}
WarpReduceSum<AccT, kBatchSize, kWarpSize>(sum);
phi::WarpReduceSum<AccT, kBatchSize, kWarpSize>(sum);
// write data
#pragma unroll
......@@ -566,7 +566,7 @@ __global__ void CrossEntropySoftLabel(T* loss, T* softmaxwrt, const T* softmax,
}
}
}
WarpReduceSum<T, kBatchSize, kWarpSize>(sum);
phi::WarpReduceSum<T, kBatchSize, kWarpSize>(sum);
__syncthreads();
__shared__ T sumshare[kWarpPerBatch][kBatchPerBlock][kBatchSize];
......@@ -674,7 +674,7 @@ __global__ void WarpSoftmaxForwardSoftLabel(T* loss, T* softmax, const T* src,
: static_cast<AccT>(valmax);
}
}
WarpReduceMax<AccT, kBatchSize, kWarpSize>(max_value);
phi::WarpReduceMax<AccT, kBatchSize, kWarpSize>(max_value);
// compute sum
AccT sum[kBatchSize]{0.0};
......@@ -694,7 +694,7 @@ __global__ void WarpSoftmaxForwardSoftLabel(T* loss, T* softmax, const T* src,
}
}
}
WarpReduceSum<AccT, kBatchSize, kWarpSize>(sum);
phi::WarpReduceSum<AccT, kBatchSize, kWarpSize>(sum);
// log_softmax and loss
AccT sumloss[kBatchSize]{0.0};
......@@ -737,7 +737,7 @@ __global__ void WarpSoftmaxForwardSoftLabel(T* loss, T* softmax, const T* src,
}
// loss
WarpReduceSum<AccT, kBatchSize, kWarpSize>(sumloss);
phi::WarpReduceSum<AccT, kBatchSize, kWarpSize>(sumloss);
for (int i = 0; i < kBatchSize; i++) {
if (i >= local_batches) break;
......@@ -950,11 +950,12 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> {
Tensor* loss = context.Output<Tensor>("Loss");
const int rank = softmax->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis =
phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis_dim = softmax->dims()[axis];
const int n = SizeToAxis(axis, softmax->dims());
const int d = SizeFromAxis(axis, softmax->dims());
const int n = phi::funcs::SizeToAxis(axis, softmax->dims());
const int d = phi::funcs::SizeFromAxis(axis, softmax->dims());
auto* softmax_out_data =
softmax_out->template mutable_data<T>(context.GetPlace());
......@@ -1035,11 +1036,11 @@ class SoftmaxWithCrossEntropyCUDAKernel : public framework::OpKernel<T> {
Tensor* loss = context.Output<Tensor>("Loss");
const int rank = logits->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = logits->dims()[axis];
const int64_t n = SizeToAxis(axis, logits->dims());
const int64_t d = SizeFromAxis(axis, logits->dims());
const int64_t n = phi::funcs::SizeToAxis(axis, logits->dims());
const int64_t d = phi::funcs::SizeFromAxis(axis, logits->dims());
auto* softmax_data = softmax->template mutable_data<T>(context.GetPlace());
auto* loss_data = loss->template mutable_data<T>(context.GetPlace());
......@@ -1118,11 +1119,11 @@ class SoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel<T> {
T* logit_grad_data = logit_grad->template data<T>();
const int rank = logit_grad->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = logit_grad->dims()[axis];
const int64_t n = SizeToAxis(axis, logit_grad->dims());
const int64_t d = SizeFromAxis(axis, logit_grad->dims());
const int64_t n = phi::funcs::SizeToAxis(axis, logit_grad->dims());
const int64_t d = phi::funcs::SizeFromAxis(axis, logit_grad->dims());
const int64_t remain = d / axis_dim;
#ifdef __HIPCC__
......
......@@ -18,7 +18,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace paddle {
namespace operators {
......@@ -84,7 +84,8 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
Tensor* softmax_out = context.Output<Tensor>("Softmax");
Tensor* loss = context.Output<Tensor>("Loss");
const int rank = softmax->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis =
phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = softmax->dims()[axis];
PADDLE_ENFORCE_GT(
......@@ -97,7 +98,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
softmax_out->mutable_data<T>(context.GetPlace());
loss->mutable_data<T>(context.GetPlace());
const int n = SizeToAxis(axis, softmax->dims());
const int n = phi::funcs::SizeToAxis(axis, softmax->dims());
PADDLE_ENFORCE_GT(
n, 0, platform::errors::InvalidArgument(
......@@ -105,7 +106,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
"SizeToAxis of softmax is %d.",
n));
const int d = SizeFromAxis(axis, softmax->dims());
const int d = phi::funcs::SizeFromAxis(axis, softmax->dims());
Tensor softmax_2d, labels_2d, loss_2d, softmax_out_2d;
softmax_2d.ShareDataWith(*softmax).Resize({n, d});
......@@ -133,7 +134,7 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
Tensor* loss = context.Output<Tensor>("Loss");
const int rank = logits->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = logits->dims()[axis];
PADDLE_ENFORCE_GT(
axis_dim, 0,
......@@ -145,14 +146,14 @@ class SoftmaxWithCrossEntropyKernel : public framework::OpKernel<T> {
softmax->mutable_data<T>(context.GetPlace());
loss->mutable_data<T>(context.GetPlace());
const int n = SizeToAxis(axis, logits->dims());
const int n = phi::funcs::SizeToAxis(axis, logits->dims());
PADDLE_ENFORCE_GT(
n, 0, platform::errors::InvalidArgument(
"The size of axis should be larger than 0, but received "
"SizeToAxis of logits is %d.",
n));
const int d = SizeFromAxis(axis, logits->dims());
const int d = phi::funcs::SizeFromAxis(axis, logits->dims());
Tensor logits_2d, softmax_2d, labels_2d, loss_2d;
logits_2d.ShareDataWith(*logits).Resize({n, d});
softmax_2d.ShareDataWith(*softmax).Resize({n, d});
......@@ -192,7 +193,7 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel<T> {
auto ignore_index = context.Attr<int>("ignore_index");
const int rank = logit_grad->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
int axis_dim = logit_grad->dims()[axis];
PADDLE_ENFORCE_GT(
axis_dim, 0,
......@@ -201,14 +202,14 @@ class SoftmaxWithCrossEntropyGradKernel : public framework::OpKernel<T> {
"axis dimention is %d.",
axis_dim));
const int n = SizeToAxis(axis, logit_grad->dims());
const int n = phi::funcs::SizeToAxis(axis, logit_grad->dims());
PADDLE_ENFORCE_GT(
n, 0, platform::errors::InvalidArgument(
"The size of axis should be larger than 0, but received "
"SizeToAxis of logit_grad is %d.",
n));
const int d = SizeFromAxis(axis, logit_grad->dims());
const int d = phi::funcs::SizeFromAxis(axis, logit_grad->dims());
Tensor logit_grad_2d, labels_2d, out_grad_2d;
logit_grad_2d.ShareDataWith(*logit_grad).Resize({n, d});
labels_2d.ShareDataWith(labels).Resize({n, labels.numel() / n});
......
......@@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/operators/softmax_with_cross_entropy_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/platform/device/npu/npu_op_runner.h"
namespace paddle {
......@@ -40,15 +41,16 @@ class SoftmaxWithCrossEntropyNPUKernel : public framework::OpKernel<T> {
"the npu kernel of softmax_with_cross_entropy."));
const int rank = logits->dims().size();
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), rank);
const int n = SizeToAxis(axis, logits->dims());
const int d = SizeFromAxis(axis, logits->dims());
const int axis = phi::funcs::CanonicalAxis(ctx.Attr<int>("axis"), rank);
const int n = phi::funcs::SizeToAxis(axis, logits->dims());
const int d = phi::funcs::SizeFromAxis(axis, logits->dims());
PADDLE_ENFORCE_EQ(
labels->numel(), n,
platform::errors::Unimplemented(
"The size of labels should be equal to SizeToAxis of logits,"
"but got size of labels is %d and SizeToAxis is %d.",
"The size of labels should be equal to phi::funcs::SizeToAxis of "
"logits,"
"but got size of labels is %d and phi::funcs::SizeToAxis is %d.",
labels->numel(), n));
loss->mutable_data<T>(ctx.GetPlace());
......@@ -97,9 +99,9 @@ class SoftmaxWithCrossEntropyGradNPUKernel : public framework::OpKernel<T> {
logits_grad->mutable_data<T>(ctx.GetPlace());
const int rank = logits_grad->dims().size();
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), rank);
const int n = SizeToAxis(axis, logits_grad->dims());
const int d = SizeFromAxis(axis, logits_grad->dims());
const int axis = phi::funcs::CanonicalAxis(ctx.Attr<int>("axis"), rank);
const int n = phi::funcs::SizeToAxis(axis, logits_grad->dims());
const int d = phi::funcs::SizeFromAxis(axis, logits_grad->dims());
Tensor logits_grad_2d, loss_grad_1d, backprop_2d;
......
......@@ -38,13 +38,13 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel<T> {
Tensor* softmax = context.Output<Tensor>("Softmax");
Tensor* loss = context.Output<Tensor>("Loss");
const int rank = logits->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
PADDLE_ENFORCE_EQ(axis, rank - 1, platform::errors::InvalidArgument(
"axis should == rank - 1"));
softmax->mutable_data<T>(context.GetPlace());
loss->mutable_data<T>(context.GetPlace());
const int n = SizeToAxis(axis, logits->dims());
const int d = SizeFromAxis(axis, logits->dims());
const int n = phi::funcs::SizeToAxis(axis, logits->dims());
const int d = phi::funcs::SizeFromAxis(axis, logits->dims());
std::vector<int> logits_dims = phi::vectorize<int>(logits->dims());
const bool soft_label = context.Attr<bool>("soft_label");
......@@ -122,11 +122,11 @@ class SoftmaxWithCrossEntropyGradXPUKernel : public framework::OpKernel<T> {
auto ignore_index = context.Attr<int>("ignore_index");
const int rank = logit_grad->dims().size();
const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
const int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
PADDLE_ENFORCE_EQ(axis, rank - 1, platform::errors::InvalidArgument(
"axis should == rank - 1"));
const int n = SizeToAxis(axis, logit_grad->dims());
const int d = SizeFromAxis(axis, logit_grad->dims());
const int n = phi::funcs::SizeToAxis(axis, logit_grad->dims());
const int d = phi::funcs::SizeFromAxis(axis, logit_grad->dims());
auto& dev_ctx =
context.template device_context<platform::XPUDeviceContext>();
......
......@@ -22,7 +22,7 @@ limitations under the License. */
USE_OP(relu);
USE_OP_ITSELF(elementwise_add);
USE_OP(softmax);
USE_OP_ITSELF(softmax);
namespace paddle {
namespace operators {
......
......@@ -220,4 +220,11 @@ class GPUContext : public DeviceContext {
std::unique_ptr<Impl> impl_;
};
// Note: In order to register the kernel of CUDNN, GPUDNNContext is required.
// Currently, CUDNN kernel directly uses GPUContext. But if the kernel function
// has the same name, this will lead to duplicate instantiations of GPU kernel
// and GPUDNN kernel function, so if we using GPUDNNContext = GPUContext, we
// must use different function name for cudnn kernel
using GPUDNNContext = GPUContext;
} // namespace phi
......@@ -50,7 +50,7 @@ enum class Backend : uint8_t {
// the third library backend
MKLDNN,
CUDNN,
GPUDNN, // cuDNN and hipDNN
// end of backend types
NUM_BACKENDS,
......@@ -112,8 +112,8 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
case Backend::MKLDNN:
os << "MKLDNN";
break;
case Backend::CUDNN:
os << "CUDNN";
case Backend::GPUDNN:
os << "GPUDNN";
break;
default: {
size_t device_type_id_ = static_cast<size_t>(backend) -
......@@ -145,8 +145,8 @@ inline Backend StringToBackend(const char* backend_cstr) {
return Backend::NPU;
} else if (s == std::string("MKLDNN")) {
return Backend::MKLDNN;
} else if (s == std::string("CUDNN")) {
return Backend::CUDNN;
} else if (s == std::string("GPUDNN")) {
return Backend::GPUDNN;
} else {
return static_cast<Backend>(static_cast<size_t>(Backend::NUM_BACKENDS) +
phi::GetOrRegisterGlobalDeviceTypeId(s));
......
......@@ -988,6 +988,18 @@ inline std::ostream& operator<<(std::ostream& os, const float16& a) {
return os;
}
template <typename T>
class MPTypeTrait {
public:
using Type = T;
};
template <>
class MPTypeTrait<float16> {
public:
using Type = float;
};
} // namespace dtype
} // namespace phi
......
......@@ -58,7 +58,7 @@ phi::Place TransToPtenPlace(const Backend& backend, bool set_device_id) {
return phi::CPUPlace();
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
case phi::Backend::CUDNN:
case phi::Backend::GPUDNN:
return phi::GPUPlace(
set_device_id ? phi::backends::gpu::GetCurrentDeviceId() : 0);
#endif
......
......@@ -15,8 +15,15 @@ set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function i
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
# auto build kernel targets by cmake
register_kernels(DEPS ${COMMON_KERNEL_DEPS})
# NOTE: Some kernels depend on some targets that are not commonly used.
# These targets are not suitable for common dependencies.
# In this case, you need to manually generate them here.
set(MANUAL_BUILD_KERNELS softmax_kernel softmax_grad_kernel)
kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
# auto parse and build kernel targets by cmake
register_kernels(EXCLUDES ${MANUAL_BUILD_KERNELS} DEPS ${COMMON_KERNEL_DEPS})
# phi sparse kernels
add_subdirectory(sparse)
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/softmax_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/softmax_grad_kernel_impl.h"
PD_REGISTER_KERNEL(
softmax_grad, CPU, ALL_LAYOUT, phi::SoftmaxGradKernel, float, double) {}
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/softmax_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/softmax_kernel_impl.h"
PD_REGISTER_KERNEL(
softmax, CPU, ALL_LAYOUT, phi::SoftmaxRawKernel, float, double) {}
......@@ -12,16 +12,43 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/softmax_op.h"
#include "paddle/fluid/platform/float16.h"
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
softmax, ops::SoftmaxKernel<plat::CUDADeviceContext, float>,
ops::SoftmaxKernel<plat::CUDADeviceContext, double>,
ops::SoftmaxKernel<plat::CUDADeviceContext, plat::float16>);
REGISTER_OP_CUDA_KERNEL(
softmax_grad, ops::SoftmaxGradKernel<plat::CUDADeviceContext, float>,
ops::SoftmaxGradKernel<plat::CUDADeviceContext, double>,
ops::SoftmaxGradKernel<plat::CUDADeviceContext, plat::float16>);
#pragma once
#include "paddle/phi/core/ddim.h"
namespace phi {
namespace funcs {
static inline int CanonicalAxis(const int axis, const int rank) {
if (axis < 0) {
return axis + rank;
}
return axis;
}
static inline int SizeToAxis(const int axis, DDim dims) {
int size = 1;
for (int i = 0; i < axis; i++) {
size *= dims[i];
}
return size;
}
static inline int SizeFromAxis(const int axis, DDim dims) {
int size = 1;
for (int i = axis; i < dims.size(); i++) {
size *= dims[i];
}
return size;
}
static inline int SizeOutAxis(const int axis, DDim dims) {
int size = 1;
for (int i = axis + 1; i < dims.size(); i++) {
size *= dims[i];
}
return size;
}
} // namespace funcs
} // namespace phi
......@@ -92,4 +92,4 @@ static inline phi::DDim ComputeAndCheckShape(
}
} // namespace funcs
} // namespace phi
} // namespace phi
......@@ -55,5 +55,5 @@ struct EigenSub<Eigen::GpuDevice, T> {
template struct EigenSub<Eigen::GpuDevice, float>;
} // namespace fucns
} // namespace funcs
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/softmax_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/softmax_grad_kernel_impl.h"
PD_REGISTER_KERNEL(softmax_grad,
GPU,
ALL_LAYOUT,
phi::SoftmaxGradKernel,
float,
double,
phi::dtype::float16) {}
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/softmax_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/softmax_kernel_impl.h"
PD_REGISTER_KERNEL(softmax,
GPU,
ALL_LAYOUT,
phi::SoftmaxRawKernel,
float,
double,
phi::dtype::float16) {}
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/softmax_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxGradGPUDNNKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& out_grad,
int axis,
DenseTensor* x_grad) {
dev_ctx.template Alloc<T>(x_grad);
SoftmaxBackwardCUDAKernelDriver<T>(dev_ctx, out, out_grad, axis, x_grad);
}
} // namespace phi
#ifdef PADDLE_WITH_HIP
PD_REGISTER_KERNEL(softmax_grad,
GPUDNN,
ALL_LAYOUT,
phi::SoftmaxGradGPUDNNKernel,
float,
phi::dtype::float16) {}
#else
PD_REGISTER_KERNEL(softmax_grad,
GPUDNN,
ALL_LAYOUT,
phi::SoftmaxGradGPUDNNKernel,
float,
double,
phi::dtype::float16) {}
#endif
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/softmax_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/gpudnn/softmax_gpudnn.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxRawGPUDNNKernel(const Context& dev_ctx,
const DenseTensor& x,
int axis,
DenseTensor* out) {
dev_ctx.template Alloc<T>(out);
SoftmaxForwardCUDAKernelDriver<T>(dev_ctx, x, axis, out);
}
} // namespace phi
#ifdef PADDLE_WITH_HIP
PD_REGISTER_KERNEL(softmax,
GPUDNN,
ALL_LAYOUT,
phi::SoftmaxRawGPUDNNKernel,
float,
phi::dtype::float16) {}
#else
PD_REGISTER_KERNEL(softmax,
GPUDNN,
ALL_LAYOUT,
phi::SoftmaxRawGPUDNNKernel,
float,
double,
phi::dtype::float16) {}
#endif
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/kernels/softmax_grad_kernel.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& out_grad,
int axis,
DenseTensor* x_grad) {
const int rank = x_grad->dims().size();
const int calc_axis = phi::funcs::CanonicalAxis(axis, rank);
int axis_dim = x_grad->dims()[calc_axis];
// allocate memory on device.
dev_ctx.template Alloc<T>(x_grad);
if (x_grad->numel() == 0) {
return;
}
const int n = phi::funcs::SizeToAxis(calc_axis, x_grad->dims());
const int d = phi::funcs::SizeFromAxis(calc_axis, x_grad->dims());
DenseTensor dX_2d, Out_2d, dOut_2d;
dX_2d.ShareDataWith(*x_grad).Resize({n, d});
Out_2d.ShareDataWith(out).Resize({n, d});
dOut_2d.ShareDataWith(out_grad).Resize({n, d});
paddle::operators::math::SoftmaxGradFunctor<Context, T>()(
dev_ctx, axis_dim, &Out_2d, &dOut_2d, &dX_2d);
}
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/kernels/softmax_kernel.h"
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxRawKernel(const Context& dev_ctx,
const DenseTensor& x,
int axis,
DenseTensor* out) {
const int rank = x.dims().size();
const int calc_axis = phi::funcs::CanonicalAxis(axis, rank);
int axis_dim = x.dims()[calc_axis];
// allocate memory on device.
dev_ctx.template Alloc<T>(out);
if (out->numel() == 0) {
return;
}
const int n = phi::funcs::SizeToAxis(calc_axis, x.dims());
const int d = phi::funcs::SizeFromAxis(calc_axis, x.dims());
DenseTensor X_2d, Out_2d;
X_2d.ShareDataWith(x).Resize({n, d});
Out_2d.ShareDataWith(*out).Resize({n, d});
paddle::operators::math::SoftmaxFunctor<Context, T, false>()(
dev_ctx, axis_dim, &X_2d, &Out_2d);
}
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/cast_kernel.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& out_grad,
int axis,
DenseTensor* x_grad);
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/cast_kernel.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxRawKernel(const Context& dev_ctx,
const DenseTensor& x,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void SoftmaxKernel(const Context& dev_ctx,
const DenseTensor& x,
int axis,
DataType dtype,
DenseTensor* out) {
auto cast_x = phi::Cast<T, Context>(dev_ctx, x, dtype);
phi::SoftmaxRawKernel<T, Context>(dev_ctx, axis, out);
}
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature SoftmaxOpArgumentMapping(const ArgumentMappingContext& ctx) {
return KernelSignature("softmax", {"X"}, {"axis"}, {"Out"});
}
KernelSignature SoftmaxGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("softmax_grad",
{"Out", GradVarName("Out")},
{"axis"},
{GradVarName("X")});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(softmax, phi::SoftmaxOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(softmax_grad, phi::SoftmaxGradOpArgumentMapping);
......@@ -41,8 +41,8 @@ TEST(Backend, OStream) {
oss << phi::Backend::MKLDNN;
EXPECT_EQ(oss.str(), "MKLDNN");
oss.str("");
oss << phi::Backend::CUDNN;
EXPECT_EQ(oss.str(), "CUDNN");
oss << phi::Backend::GPUDNN;
EXPECT_EQ(oss.str(), "GPUDNN");
oss.str("");
try {
oss << phi::Backend::NUM_BACKENDS;
......@@ -60,7 +60,7 @@ TEST(Backend, StringToBackend) {
EXPECT_EQ(phi::Backend::XPU, pexp::StringToBackend("XPU"));
EXPECT_EQ(phi::Backend::NPU, pexp::StringToBackend("NPU"));
EXPECT_EQ(phi::Backend::MKLDNN, pexp::StringToBackend("MKLDNN"));
EXPECT_EQ(phi::Backend::CUDNN, pexp::StringToBackend("CUDNN"));
EXPECT_EQ(phi::Backend::GPUDNN, pexp::StringToBackend("GPUDNN"));
EXPECT_EQ(static_cast<phi::Backend>(
static_cast<size_t>(phi::Backend::NUM_BACKENDS) + 1),
pexp::StringToBackend("CustomBackend"));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册