未验证 提交 78ecb668 编写于 作者: W wuhuanzhou 提交者: GitHub

optimize OP's compilation time (#32617)

* optimize OP's compilation time, test=develop

* add more op and run ci test, test=develop

* CUDA Kernel register in cc file, test=develop

* fix macros, test=develop

* fix undefined symbol error, test=develop

* fix compilation error and undefined symbol, test=develop

* fix compilation error on Windows, test=develop

* fix compilation error on Windows, test=develop
上级 5c79dbb2
...@@ -171,7 +171,7 @@ if (WITH_MKLDNN) ...@@ -171,7 +171,7 @@ if (WITH_MKLDNN)
cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass pass_test_util) cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass pass_test_util)
cc_test(test_fc_act_mkldnn_fuse_pass SRCS mkldnn/fc_act_mkldnn_fuse_pass_tester.cc DEPS fc_act_mkldnn_fuse_pass pass_test_util) cc_test(test_fc_act_mkldnn_fuse_pass SRCS mkldnn/fc_act_mkldnn_fuse_pass_tester.cc DEPS fc_act_mkldnn_fuse_pass pass_test_util)
cc_test(test_batch_norm_act_fuse_pass SRCS mkldnn/batch_norm_act_fuse_pass_tester.cc DEPS batch_norm_act_fuse_pass pass_test_util) cc_test(test_batch_norm_act_fuse_pass SRCS mkldnn/batch_norm_act_fuse_pass_tester.cc DEPS batch_norm_act_fuse_pass pass_test_util)
set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass conv_op conv_transpose_op math_function im2col vol2col batch_norm_op gelu_op activation_op elementwise_add_op concat_and_split naive_executor device_context) set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass conv_op conv_transpose_op math_function im2col vol2col batch_norm_op gelu_op activation_op elementwise_add_op concat_and_split naive_executor device_context eigen_function)
if (WITH_GPU OR WITH_ROCM) if (WITH_GPU OR WITH_ROCM)
set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv) set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv)
endif() endif()
......
...@@ -15,4 +15,4 @@ cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_ ...@@ -15,4 +15,4 @@ cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_
cc_library(inplace_addto_op_pass SRCS inplace_addto_op_pass.cc DEPS memory_reuse_pass) cc_library(inplace_addto_op_pass SRCS inplace_addto_op_pass.cc DEPS memory_reuse_pass)
cc_test(test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc DEPS parallel_executor elementwise_mul_op elementwise_add_op scale_op) cc_test(test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc DEPS parallel_executor elementwise_mul_op elementwise_add_op scale_op eigen_function)
...@@ -317,8 +317,12 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I, ...@@ -317,8 +317,12 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \ ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__) __VA_ARGS__)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#define REGISTER_OP_CUDA_KERNEL(op_type, ...) \ #define REGISTER_OP_CUDA_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__) REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__)
#else
#define REGISTER_OP_CUDA_KERNEL(op_type, ...)
#endif
#define REGISTER_OP_CPU_KERNEL(op_type, ...) \ #define REGISTER_OP_CPU_KERNEL(op_type, ...) \
REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
......
...@@ -115,9 +115,9 @@ set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_fun ...@@ -115,9 +115,9 @@ set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_fun
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper boost ps_gpu_wrapper) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper boost ps_gpu_wrapper)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_cc_function) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_function)
if (WITH_GPU OR WITH_ROCM) if (WITH_GPU OR WITH_ROCM)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu bert_encoder_functor eigen_cu_function) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu bert_encoder_functor)
endif() endif()
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} device_memory_aligment) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} device_memory_aligment)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} layer) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} layer)
......
cc_test(op_tester SRCS op_tester.cc op_tester_config.cc cc_test(op_tester SRCS op_tester.cc op_tester_config.cc
DEPS memory timer framework_proto proto_desc lod_tensor op_registry DEPS memory timer framework_proto proto_desc lod_tensor op_registry
device_context scope ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) device_context scope ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} eigen_function)
...@@ -23,6 +23,7 @@ limitations under the License. */ ...@@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/fluid/framework/conv_search_cache.h" #include "paddle/fluid/framework/conv_search_cache.h"
#include "paddle/fluid/framework/operator_kernel_configs.h" #include "paddle/fluid/framework/operator_kernel_configs.h"
#include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/cudnn_desc.h" #include "paddle/fluid/platform/cudnn_desc.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -58,8 +59,8 @@ static void RemovePaddingSlice(const framework::ExecutionContext& context, ...@@ -58,8 +59,8 @@ static void RemovePaddingSlice(const framework::ExecutionContext& context,
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
auto in_dims = input->dims(); auto in_dims = input->dims();
auto new_out_dims = out->dims(); auto new_out_dims = out->dims();
auto offsets = Eigen::array<int, D>(); auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
auto extents = Eigen::array<int, D>(); auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
for (size_t i = 0; i < D; ++i) { for (size_t i = 0; i < D; ++i) {
offsets[i] = 0; offsets[i] = 0;
extents[i] = new_out_dims[i]; extents[i] = new_out_dims[i];
...@@ -81,7 +82,8 @@ static void RemovePaddingSlice(const framework::ExecutionContext& context, ...@@ -81,7 +82,8 @@ static void RemovePaddingSlice(const framework::ExecutionContext& context,
auto out_t = auto out_t =
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From( framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
*out, new_out_dims); *out, new_out_dims);
out_t.device(place) = in_t.slice(offsets, extents); EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(place, out_t, in_t,
offsets, extents);
} }
template <typename T> template <typename T>
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/math/depthwise_conv.h" #include "paddle/fluid/operators/math/depthwise_conv.h"
...@@ -40,8 +41,8 @@ static void Slice(const framework::ExecutionContext& context, ...@@ -40,8 +41,8 @@ static void Slice(const framework::ExecutionContext& context,
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
auto in_dims = input->dims(); auto in_dims = input->dims();
auto offsets = Eigen::array<int, D>(); auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
auto extents = Eigen::array<int, D>(); auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
for (size_t i = 0; i < D; ++i) { for (size_t i = 0; i < D; ++i) {
offsets[i] = 0; offsets[i] = 0;
extents[i] = in_dims[i]; extents[i] = in_dims[i];
...@@ -64,7 +65,8 @@ static void Slice(const framework::ExecutionContext& context, ...@@ -64,7 +65,8 @@ static void Slice(const framework::ExecutionContext& context,
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From( framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
*out, out_dims); *out, out_dims);
out_t.device(place) = in_t.slice(offsets, extents); EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(place, out_t, in_t,
offsets, extents);
out->Resize(out_dims); out->Resize(out_dims);
} }
......
...@@ -220,3 +220,10 @@ REGISTER_OP_CPU_KERNEL( ...@@ -220,3 +220,10 @@ REGISTER_OP_CPU_KERNEL(
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
crop_grad, ops::CropGradKernel<paddle::platform::CPUDeviceContext, float>, crop_grad, ops::CropGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::CropGradKernel<paddle::platform::CPUDeviceContext, double>); ops::CropGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
crop, ops::CropKernel<paddle::platform::CUDADeviceContext, float>,
ops::CropKernel<paddle::platform::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
crop_grad, ops::CropGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::CropGradKernel<paddle::platform::CUDADeviceContext, double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/crop_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
crop, ops::CropKernel<paddle::platform::CUDADeviceContext, float>,
ops::CropKernel<paddle::platform::CUDADeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
crop_grad, ops::CropGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::CropGradKernel<paddle::platform::CUDADeviceContext, double>);
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/operators/strided_memcpy.h"
namespace paddle { namespace paddle {
...@@ -89,15 +90,16 @@ void CropFunction(const framework::ExecutionContext& context) { ...@@ -89,15 +90,16 @@ void CropFunction(const framework::ExecutionContext& context) {
auto x_tensor = EigenTensor<T, D>::From(*x); auto x_tensor = EigenTensor<T, D>::From(*x);
auto out_tensor = EigenTensor<T, D>::From(*out); auto out_tensor = EigenTensor<T, D>::From(*out);
Eigen::array<int, D> e_offsets; Eigen::DSizes<Eigen::DenseIndex, D> e_offsets;
Eigen::array<int, D> e_shape; Eigen::DSizes<Eigen::DenseIndex, D> e_shape;
for (size_t i = 0; i < D; ++i) { for (size_t i = 0; i < D; ++i) {
e_offsets[i] = offsets[i]; e_offsets[i] = offsets[i];
e_shape[i] = out->dims()[i]; e_shape[i] = out->dims()[i];
} }
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
out_tensor.device(place) = x_tensor.slice(e_offsets, e_shape); EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(
place, out_tensor, x_tensor, e_offsets, e_shape);
} }
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
...@@ -148,16 +150,17 @@ void CropGradFunction(const framework::ExecutionContext& context) { ...@@ -148,16 +150,17 @@ void CropGradFunction(const framework::ExecutionContext& context) {
auto* d_out = context.Input<Tensor>(framework::GradVarName("Out")); auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
d_x->mutable_data<T>(x->dims(), context.GetPlace()); d_x->mutable_data<T>(x->dims(), context.GetPlace());
auto offsets = GetOffsets(context); auto offsets = GetOffsets(context);
Eigen::array<std::pair<int, int>, D> paddings; Eigen::array<std::pair<int64_t, int64_t>, D> paddings;
for (size_t i = 0; i < D; ++i) { for (size_t i = 0; i < D; ++i) {
paddings[i].first = offsets[i]; paddings[i].first = offsets[i];
paddings[i].second = d_x->dims()[i] - d_out->dims()[i] - offsets[i]; paddings[i].second = d_x->dims()[i] - d_out->dims()[i] - offsets[i];
} }
auto d_x_tensor = EigenTensor<T, D>::From(*d_x); auto d_x_tensor = EigenTensor<T, D>::From(*d_x);
auto d_out_tensor = EigenTensor<T, D>::From(*d_out); auto d_out_tensor = EigenTensor<T, D>::From(*d_out);
d_x_tensor.device( auto& place =
*context.template device_context<DeviceContext>().eigen_device()) = *context.template device_context<DeviceContext>().eigen_device();
d_out_tensor.pad(paddings, 0); EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
place, d_x_tensor, d_out_tensor, paddings, static_cast<T>(0));
} }
} }
......
...@@ -319,3 +319,16 @@ REGISTER_OP_CPU_KERNEL( ...@@ -319,3 +319,16 @@ REGISTER_OP_CPU_KERNEL(
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, double>, ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int>, ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
crop_tensor,
ops::CropTensorKernel<paddle::platform::CUDADeviceContext, float>,
ops::CropTensorKernel<paddle::platform::CUDADeviceContext, double>,
ops::CropTensorKernel<paddle::platform::CUDADeviceContext, int>,
ops::CropTensorKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
crop_tensor_grad,
ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/crop_tensor_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
crop_tensor,
ops::CropTensorKernel<paddle::platform::CUDADeviceContext, float>,
ops::CropTensorKernel<paddle::platform::CUDADeviceContext, double>,
ops::CropTensorKernel<paddle::platform::CUDADeviceContext, int>,
ops::CropTensorKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
crop_tensor_grad,
ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/operators/strided_memcpy.h"
namespace paddle { namespace paddle {
...@@ -199,15 +200,16 @@ void CropTensorFunction(const framework::ExecutionContext& context) { ...@@ -199,15 +200,16 @@ void CropTensorFunction(const framework::ExecutionContext& context) {
auto x_tensor = EigenTensor<T, D>::From(*x); auto x_tensor = EigenTensor<T, D>::From(*x);
auto out_tensor = EigenTensor<T, D>::From(*out); auto out_tensor = EigenTensor<T, D>::From(*out);
Eigen::array<int, D> e_offsets; Eigen::DSizes<Eigen::DenseIndex, D> e_offsets;
Eigen::array<int, D> e_shape; Eigen::DSizes<Eigen::DenseIndex, D> e_shape;
for (size_t i = 0; i < D; ++i) { for (size_t i = 0; i < D; ++i) {
e_offsets[i] = offsets[i]; e_offsets[i] = offsets[i];
e_shape[i] = out->dims()[i]; e_shape[i] = out->dims()[i];
} }
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
out_tensor.device(place) = x_tensor.slice(e_offsets, e_shape); EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(
place, out_tensor, x_tensor, e_offsets, e_shape);
} }
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
...@@ -259,16 +261,17 @@ void CropTensorGradFunction(const framework::ExecutionContext& context) { ...@@ -259,16 +261,17 @@ void CropTensorGradFunction(const framework::ExecutionContext& context) {
auto* d_out = context.Input<Tensor>(framework::GradVarName("Out")); auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
d_x->mutable_data<T>(x->dims(), context.GetPlace()); d_x->mutable_data<T>(x->dims(), context.GetPlace());
auto offsets = GetOffsets(context); auto offsets = GetOffsets(context);
Eigen::array<std::pair<int, int>, D> paddings; Eigen::array<std::pair<int64_t, int64_t>, D> paddings;
for (size_t i = 0; i < D; ++i) { for (size_t i = 0; i < D; ++i) {
paddings[i].first = offsets[i]; paddings[i].first = offsets[i];
paddings[i].second = d_x->dims()[i] - d_out->dims()[i] - offsets[i]; paddings[i].second = d_x->dims()[i] - d_out->dims()[i] - offsets[i];
} }
auto d_x_tensor = EigenTensor<T, D>::From(*d_x); auto d_x_tensor = EigenTensor<T, D>::From(*d_x);
auto d_out_tensor = EigenTensor<T, D>::From(*d_out); auto d_out_tensor = EigenTensor<T, D>::From(*d_out);
d_x_tensor.device( auto& place =
*context.template device_context<DeviceContext>().eigen_device()) = *context.template device_context<DeviceContext>().eigen_device();
d_out_tensor.pad(paddings, 0); EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
place, d_x_tensor, d_out_tensor, paddings, static_cast<T>(0));
} }
} }
......
file(GLOB EIGEN_CC_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") file(GLOB EIGEN_CC_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc")
cc_library(eigen_cc_function SRCS ${EIGEN_CC_SOURCES} DEPS eigen3) file(GLOB EIGEN_CU_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cu")
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU)
file(GLOB EIGEN_CU_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cu") nv_library(eigen_function SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} DEPS eigen3)
if(WITH_GPU) elseif(WITH_ROCM)
nv_library(eigen_cu_function SRCS ${EIGEN_CU_SOURCES} DEPS eigen3) hip_library(eigen_function SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} DEPS eigen3)
elseif(WITH_ROCM) else()
hip_library(eigen_cu_function SRCS ${EIGEN_CU_SOURCES} DEPS eigen3) cc_library(eigen_function SRCS ${EIGEN_CC_SOURCES} DEPS eigen3)
endif()
endif() endif()
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -11,13 +11,21 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,13 +11,21 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/sign_op.h" namespace paddle {
#include "paddle/fluid/platform/float16.h" namespace operators {
REGISTER_OP_CUDA_KERNEL( template <typename T, int Rank>
sign, struct EigenConstant<Eigen::DefaultDevice, T, Rank> {
paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, float>, using Type = Eigen::TensorMap<
paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, double>, Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, static void Eval(const Eigen::DefaultDevice& dev, Type out, const T value) {
paddle::platform::float16>); out.device(dev) = out.constant(value);
}
};
template struct EigenConstant<Eigen::DefaultDevice, float, 1>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -11,12 +11,21 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,12 +11,21 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/rank_loss_op.h" namespace paddle {
namespace operators {
REGISTER_OP_CUDA_KERNEL(rank_loss, template <typename T, int Rank>
paddle::operators::RankLossKernel< struct EigenConstant<Eigen::GpuDevice, T, Rank> {
paddle::platform::CUDADeviceContext, float>); using Type = Eigen::TensorMap<
REGISTER_OP_CUDA_KERNEL(rank_loss_grad, Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
paddle::operators::RankLossGradKernel< static void Eval(const Eigen::GpuDevice& dev, Type out, const T value) {
paddle::platform::CUDADeviceContext, float>); out.device(dev) = out.constant(value);
}
};
template struct EigenConstant<Eigen::GpuDevice, float, 1>;
} // namespace operators
} // namespace paddle
...@@ -12,6 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include "unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
namespace paddle { namespace paddle {
...@@ -48,5 +54,187 @@ struct EigenBroadcastGrad { ...@@ -48,5 +54,187 @@ struct EigenBroadcastGrad {
const Array& reduce_dims, const Array2& reshape_dims); const Array& reduce_dims, const Array2& reshape_dims);
}; };
template <typename EigenDevice, typename T, int Rank>
struct EigenConstant {
using Type = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, Type out, const T value);
};
template <typename EigenDevice, typename T>
struct EigenSign {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType out, const InType& in);
};
template <typename EigenDevice, typename T, int Rank>
struct EigenReverse {
using Array = Eigen::DSizes<bool, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType out, const InType& in,
const Array& reverse);
};
template <typename EigenDevice, typename T>
struct EigenAdd {
using InType = Eigen::TensorMap<Eigen::TensorFixedSize<
const T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType out, const InType& in,
const T value);
};
template <typename EigenDevice, typename T>
struct EigenSub {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType out, const InType& left,
const InType& right);
};
template <typename EigenDevice, typename T, int Rank>
struct EigenSlice {
using Array = Eigen::DSizes<Eigen::DenseIndex, Rank>;
using Array32Bit = Eigen::DSizes<int, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using InType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
static void Eval(const EigenDevice& dev, OutType out, const InType& in,
const Array& offsets, const Array& extents);
static void Eval(const EigenDevice& dev, OutType32BitIndex out,
const InType32BitIndex& in, const Array32Bit& offsets,
const Array32Bit& extents);
};
template <typename EigenDevice, typename T, int Rank>
struct EigenPad {
using Array = std::array<std::pair<int64_t, int64_t>, Rank>;
using Array32Bit = std::array<std::pair<int, int>, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using InType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
static void Eval(const EigenDevice& dev, OutType out, const InType& in,
const Array& padding, const T value);
static void Eval(const EigenDevice& dev, OutType32BitIndex out,
const InType32BitIndex& in, const Array32Bit& padding,
const T value);
};
template <typename EigenDevice, typename T>
struct EigenScale {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType out, const InType& in,
const T scale, const T bias, const bool bias_after_scale);
};
template <typename EigenDevice, typename T>
struct EigenErf {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType out, const InType& in);
};
template <typename EigenDevice, typename T>
struct EigenErfGrad {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType din, const InType& in,
const InType& dout);
};
template <typename EigenDevice, typename T>
struct EigenRankLoss {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType out, const InType& label,
const InType& left, const InType& right);
};
template <typename EigenDevice, typename T>
struct EigenRankLossGrad {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void EvalLeft(const EigenDevice& dev, OutType dleft,
const InType& dout, const InType& label,
const InType& left, const InType& right);
static void EvalRight(const EigenDevice& dev, OutType dright,
const InType& dout, const InType& label,
const InType& left, const InType& right);
};
template <typename EigenDevice, typename T>
struct EigenHingeLoss {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType loss, const InType& pred,
const InType& label);
};
template <typename EigenDevice, typename T>
struct EigenHingeLossGrad {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType dpred, const InType& dloss,
const InType& pred, const InType& label);
};
template <typename EigenDevice, typename T>
struct EigenL1Norm {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType out, const InType& in);
};
template <typename EigenDevice, typename T>
struct EigenL1NormGrad {
using Array = Eigen::DSizes<Eigen::DenseIndex, 1>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const EigenDevice& dev, OutType din, const InType& dout,
const InType& in, const Array& bcast);
};
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenAdd<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<Eigen::TensorFixedSize<
const T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& in, const T value) {
out.device(dev) = in + value;
}
};
template struct EigenAdd<Eigen::DefaultDevice, float>;
template struct EigenAdd<Eigen::DefaultDevice, double>;
template struct EigenAdd<Eigen::DefaultDevice, int>;
template struct EigenAdd<Eigen::DefaultDevice, int64_t>;
template <typename T>
struct EigenSub<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& left, const InType& right) {
out.device(dev) = left - right;
}
};
template struct EigenSub<Eigen::DefaultDevice, float>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenAdd<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<Eigen::TensorFixedSize<
const T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
const T value) {
out.device(dev) = in + value;
}
};
template struct EigenAdd<Eigen::GpuDevice, float>;
template struct EigenAdd<Eigen::GpuDevice, double>;
template struct EigenAdd<Eigen::GpuDevice, int>;
template struct EigenAdd<Eigen::GpuDevice, int64_t>;
template <typename T>
struct EigenSub<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& left,
const InType& right) {
out.device(dev) = left - right;
}
};
template struct EigenSub<Eigen::GpuDevice, float>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/eigen_ext.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenErf<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& in) {
out.device(dev) = in.erf();
}
};
template <typename T>
struct EigenErfGrad<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType din,
const InType& in, const InType& dout) {
din.device(dev) =
dout * static_cast<T>(M_2_SQRTPI) * (-(in.square())).exp();
}
};
#define INSTANTIATION(FUNCTOR) \
template struct FUNCTOR<Eigen::DefaultDevice, float>; \
template struct FUNCTOR<Eigen::DefaultDevice, double>; \
template struct FUNCTOR<Eigen::DefaultDevice, platform::float16>
INSTANTIATION(EigenErf);
INSTANTIATION(EigenErfGrad);
#undef INSTANTIATION
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES
#endif
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/eigen_ext.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenErf<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in) {
out.device(dev) = in.erf();
}
};
template <typename T>
struct EigenErfGrad<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType din, const InType& in,
const InType& dout) {
din.device(dev) =
dout * static_cast<T>(M_2_SQRTPI) * (-(in.square())).exp();
}
};
#define INSTANTIATION(FUNCTOR) \
template struct FUNCTOR<Eigen::GpuDevice, float>; \
template struct FUNCTOR<Eigen::GpuDevice, double>; \
template struct FUNCTOR<Eigen::GpuDevice, platform::float16>
INSTANTIATION(EigenErf);
INSTANTIATION(EigenErfGrad);
#undef INSTANTIATION
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenL1Norm<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& in) {
out.device(dev) = in.abs().sum();
}
};
template <typename T>
struct EigenL1NormGrad<Eigen::DefaultDevice, T> {
using Array = Eigen::DSizes<Eigen::DenseIndex, 1>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType din,
const InType& dout, const InType& in, const Array& bcast) {
din.device(dev) = dout.broadcast(bcast) * in.sign();
}
};
template struct EigenL1Norm<Eigen::DefaultDevice, float>;
template struct EigenL1NormGrad<Eigen::DefaultDevice, float>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenL1Norm<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in) {
out.device(dev) = in.abs().sum();
}
};
template <typename T>
struct EigenL1NormGrad<Eigen::GpuDevice, T> {
using Array = Eigen::DSizes<Eigen::DenseIndex, 1>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType din, const InType& dout,
const InType& in, const Array& bcast) {
din.device(dev) = dout.broadcast(bcast) * in.sign();
}
};
template struct EigenL1Norm<Eigen::GpuDevice, float>;
template struct EigenL1NormGrad<Eigen::GpuDevice, float>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenRankLoss<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& label, const InType& left,
const InType& right) {
out.device(dev) =
(1.0f + (left - right).exp()).log() - label * (left - right);
}
};
template <typename T>
struct EigenRankLossGrad<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void EvalLeft(const Eigen::DefaultDevice& dev, OutType dleft,
const InType& dout, const InType& label,
const InType& left, const InType& right) {
dleft.device(dev) = dout * (1.0f / (1.0f + (right - left).exp()) - label);
}
static void EvalRight(const Eigen::DefaultDevice& dev, OutType dright,
const InType& dout, const InType& label,
const InType& left, const InType& right) {
dright.device(dev) = -dout * (1.0f / (1.0f + (right - left).exp()) - label);
}
};
template struct EigenRankLoss<Eigen::DefaultDevice, float>;
template struct EigenRankLossGrad<Eigen::DefaultDevice, float>;
template <typename T>
struct EigenHingeLoss<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType loss,
const InType& pred, const InType& label) {
loss.device(dev) = (static_cast<T>(1) -
pred * (static_cast<T>(2) * label - static_cast<T>(1)))
.cwiseMax(static_cast<T>(0));
}
};
template <typename T>
struct EigenHingeLossGrad<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType dpred,
const InType& dloss, const InType& pred,
const InType& label) {
auto alt_labels = static_cast<T>(2) * label - static_cast<T>(1);
dpred.device(dev) =
dloss * ((pred * alt_labels) < static_cast<T>(1)).template cast<T>() *
(-alt_labels);
}
};
template struct EigenHingeLoss<Eigen::DefaultDevice, float>;
template struct EigenHingeLossGrad<Eigen::DefaultDevice, float>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenRankLoss<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType out,
const InType& label, const InType& left,
const InType& right) {
out.device(dev) =
(1.0f + (left - right).exp()).log() - label * (left - right);
}
};
template <typename T>
struct EigenRankLossGrad<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void EvalLeft(const Eigen::GpuDevice& dev, OutType dleft,
const InType& dout, const InType& label,
const InType& left, const InType& right) {
dleft.device(dev) = dout * (1.0f / (1.0f + (right - left).exp()) - label);
}
static void EvalRight(const Eigen::GpuDevice& dev, OutType dright,
const InType& dout, const InType& label,
const InType& left, const InType& right) {
dright.device(dev) = -dout * (1.0f / (1.0f + (right - left).exp()) - label);
}
};
template struct EigenRankLoss<Eigen::GpuDevice, float>;
template struct EigenRankLossGrad<Eigen::GpuDevice, float>;
template <typename T>
struct EigenHingeLoss<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType loss,
const InType& pred, const InType& label) {
loss.device(dev) = (static_cast<T>(1) -
pred * (static_cast<T>(2) * label - static_cast<T>(1)))
.cwiseMax(static_cast<T>(0));
}
};
template <typename T>
struct EigenHingeLossGrad<Eigen::GpuDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType dpred,
const InType& dloss, const InType& pred,
const InType& label) {
auto alt_labels = static_cast<T>(2) * label - static_cast<T>(1);
dpred.device(dev) =
dloss * ((pred * alt_labels) < static_cast<T>(1)).template cast<T>() *
(-alt_labels);
}
};
template struct EigenHingeLoss<Eigen::GpuDevice, float>;
template struct EigenHingeLossGrad<Eigen::GpuDevice, float>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex64.h"
namespace paddle {
namespace operators {
template <typename T, int Rank>
struct EigenPad<Eigen::DefaultDevice, T, Rank> {
using Array = std::array<std::pair<int64_t, int64_t>, Rank>;
using Array32Bit = std::array<std::pair<int, int>, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using InType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& in, const Array& padding, const T value) {
out.device(dev) = in.pad(padding, value);
}
static void Eval(const Eigen::DefaultDevice& dev, OutType32BitIndex out,
const InType32BitIndex& in, const Array32Bit& padding,
const T value) {
out.device(dev) = in.pad(padding, value);
}
};
#define INSTANTIATION(FUNCTOR, TYPE) \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 1>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 2>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 3>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 4>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 5>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 6>
INSTANTIATION(EigenPad, int);
INSTANTIATION(EigenPad, int64_t);
INSTANTIATION(EigenPad, float);
INSTANTIATION(EigenPad, double);
INSTANTIATION(EigenPad, platform::complex64);
INSTANTIATION(EigenPad, platform::complex128);
#undef INSTANTIATION
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex64.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
template <typename T, int Rank>
struct EigenPad<Eigen::GpuDevice, T, Rank> {
using Array = std::array<std::pair<int64_t, int64_t>, Rank>;
using Array32Bit = std::array<std::pair<int, int>, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using InType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
const Array& padding, const T value) {
out.device(dev) = in.pad(padding, value);
}
static void Eval(const Eigen::GpuDevice& dev, OutType32BitIndex out,
const InType32BitIndex& in, const Array32Bit& padding,
const T value) {
out.device(dev) = in.pad(padding, value);
}
};
#define INSTANTIATION(FUNCTOR, TYPE) \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 1>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 2>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 3>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 4>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 5>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 6>
INSTANTIATION(EigenPad, int);
INSTANTIATION(EigenPad, int64_t);
INSTANTIATION(EigenPad, float);
INSTANTIATION(EigenPad, double);
INSTANTIATION(EigenPad, platform::float16);
INSTANTIATION(EigenPad, platform::complex64);
INSTANTIATION(EigenPad, platform::complex128);
#undef INSTANTIATION
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle {
namespace operators {
template <typename T, int Rank>
struct EigenReverse<Eigen::DefaultDevice, T, Rank> {
using Array = Eigen::DSizes<bool, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& in, const Array& reverse) {
out.device(dev) = in.reverse(reverse);
}
};
#define INSTANTIATION(FUNCTOR, TYPE) \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 1>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 2>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 3>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 4>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 5>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 6>
INSTANTIATION(EigenReverse, int);
INSTANTIATION(EigenReverse, uint8_t);
INSTANTIATION(EigenReverse, int64_t);
INSTANTIATION(EigenReverse, bool);
INSTANTIATION(EigenReverse, float);
INSTANTIATION(EigenReverse, double);
#undef INSTANTIATION
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle {
namespace operators {
template <typename T, int Rank>
struct EigenReverse<Eigen::GpuDevice, T, Rank> {
using Array = Eigen::DSizes<bool, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
const Array& reverse) {
out.device(dev) = in.reverse(reverse);
}
};
#define INSTANTIATION(FUNCTOR, TYPE) \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 1>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 2>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 3>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 4>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 5>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 6>
INSTANTIATION(EigenReverse, int);
INSTANTIATION(EigenReverse, uint8_t);
INSTANTIATION(EigenReverse, int64_t);
INSTANTIATION(EigenReverse, bool);
INSTANTIATION(EigenReverse, float);
INSTANTIATION(EigenReverse, double);
#undef INSTANTIATION
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/bfloat16.h"
namespace paddle {
namespace operators {
template <typename T>
struct EigenScale<Eigen::DefaultDevice, T> {
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& in, const T scale, const T bias,
const bool bias_after_scale) {
if (bias_after_scale) {
out.device(dev) = scale * in + bias;
} else {
out.device(dev) = scale * (in + bias);
}
}
};
template struct EigenScale<Eigen::DefaultDevice, float>;
template struct EigenScale<Eigen::DefaultDevice, double>;
template struct EigenScale<Eigen::DefaultDevice, platform::bfloat16>;
template struct EigenScale<Eigen::DefaultDevice, uint8_t>;
template struct EigenScale<Eigen::DefaultDevice, int8_t>;
template struct EigenScale<Eigen::DefaultDevice, int16_t>;
template struct EigenScale<Eigen::DefaultDevice, int>;
template struct EigenScale<Eigen::DefaultDevice, int64_t>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -11,22 +11,36 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,22 +11,36 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/scale_op.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL( namespace paddle {
scale, namespace operators {
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, float>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, double>, template <typename T>
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, struct EigenScale<Eigen::GpuDevice, T> {
uint8_t>, using InType = Eigen::TensorMap<
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int8_t>, Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, using OutType =
int16_t>, Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int>, static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, const T scale, const T bias, const bool bias_after_scale) {
int64_t>, if (bias_after_scale) {
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, out.device(dev) = scale * in + bias;
plat::float16>); } else {
out.device(dev) = scale * (in + bias);
}
}
};
template struct EigenScale<Eigen::GpuDevice, float>;
template struct EigenScale<Eigen::GpuDevice, double>;
template struct EigenScale<Eigen::GpuDevice, uint8_t>;
template struct EigenScale<Eigen::GpuDevice, int8_t>;
template struct EigenScale<Eigen::GpuDevice, int16_t>;
template struct EigenScale<Eigen::GpuDevice, int>;
template struct EigenScale<Eigen::GpuDevice, int64_t>;
template struct EigenScale<Eigen::GpuDevice, platform::float16>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -11,18 +11,25 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,18 +11,25 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/erf_op.h" namespace paddle {
#include "paddle/fluid/platform/float16.h" namespace operators {
namespace ops = paddle::operators; template <typename T>
REGISTER_OP_CUDA_KERNEL( struct EigenSign<Eigen::DefaultDevice, T> {
erf, ops::ErfKernel<paddle::platform::CUDADeviceContext, float>, using InType = Eigen::TensorMap<
ops::ErfKernel<paddle::platform::CUDADeviceContext, double>, Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
ops::ErfKernel<paddle::platform::CUDADeviceContext, using OutType =
paddle::platform::float16>); Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
REGISTER_OP_CUDA_KERNEL( static void Eval(const Eigen::DefaultDevice& dev, OutType out,
erf_grad, ops::ErfGradKernel<paddle::platform::CUDADeviceContext, float>, const InType& in) {
ops::ErfGradKernel<paddle::platform::CUDADeviceContext, double>, out.device(dev) = in.sign();
ops::ErfGradKernel<paddle::platform::CUDADeviceContext, }
paddle::platform::float16>); };
template struct EigenSign<Eigen::DefaultDevice, float>;
template struct EigenSign<Eigen::DefaultDevice, double>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -11,9 +11,27 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,9 +11,27 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/eigen_ext.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/operators/minus_op.h" namespace paddle {
namespace operators {
REGISTER_OP_CUDA_KERNEL( template <typename T>
minus, struct EigenSign<Eigen::GpuDevice, T> {
paddle::operators::MinusKernel<paddle::platform::CUDADeviceContext, float>); using InType = Eigen::TensorMap<
Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType =
Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in) {
out.device(dev) = in.sign();
}
};
template struct EigenSign<Eigen::GpuDevice, float>;
template struct EigenSign<Eigen::GpuDevice, double>;
template struct EigenSign<Eigen::GpuDevice, platform::float16>;
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex64.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
template <typename T, int Rank>
struct EigenSlice<Eigen::DefaultDevice, T, Rank> {
using Array = Eigen::DSizes<Eigen::DenseIndex, Rank>;
using Array32Bit = Eigen::DSizes<int, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using InType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
static void Eval(const Eigen::DefaultDevice& dev, OutType out,
const InType& in, const Array& offsets,
const Array& extents) {
out.device(dev) = in.slice(offsets, extents);
}
static void Eval(const Eigen::DefaultDevice& dev, OutType32BitIndex out,
const InType32BitIndex& in, const Array32Bit& offsets,
const Array32Bit& extents) {
out.device(dev) = in.slice(offsets, extents);
}
};
#define INSTANTIATION(FUNCTOR, TYPE) \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 1>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 2>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 3>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 4>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 5>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 6>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 7>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 8>; \
template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 9>
INSTANTIATION(EigenSlice, bool);
INSTANTIATION(EigenSlice, int);
INSTANTIATION(EigenSlice, int8_t);
INSTANTIATION(EigenSlice, uint8_t);
INSTANTIATION(EigenSlice, int16_t);
INSTANTIATION(EigenSlice, int64_t);
INSTANTIATION(EigenSlice, float);
INSTANTIATION(EigenSlice, double);
INSTANTIATION(EigenSlice, platform::float16);
INSTANTIATION(EigenSlice, platform::bfloat16);
INSTANTIATION(EigenSlice, platform::complex64);
INSTANTIATION(EigenSlice, platform::complex128);
INSTANTIATION(EigenSlice, platform::complex<float>);
INSTANTIATION(EigenSlice, platform::complex<double>);
#undef INSTANTIATION
} // namespace operators
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/platform/complex128.h"
#include "paddle/fluid/platform/complex64.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
template <typename T, int Rank>
struct EigenSlice<Eigen::GpuDevice, T, Rank> {
using Array = Eigen::DSizes<Eigen::DenseIndex, Rank>;
using Array32Bit = Eigen::DSizes<int, Rank>;
using InType = Eigen::TensorMap<
Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using InType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
using OutType = Eigen::TensorMap<
Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
using OutType32BitIndex =
Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
Eigen::Aligned>;
static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
const Array& offsets, const Array& extents) {
out.device(dev) = in.slice(offsets, extents);
}
static void Eval(const Eigen::GpuDevice& dev, OutType32BitIndex out,
const InType32BitIndex& in, const Array32Bit& offsets,
const Array32Bit& extents) {
out.device(dev) = in.slice(offsets, extents);
}
};
#define INSTANTIATION(FUNCTOR, TYPE) \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 1>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 2>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 3>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 4>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 5>; \
template struct FUNCTOR<Eigen::GpuDevice, TYPE, 6>
INSTANTIATION(EigenSlice, int);
INSTANTIATION(EigenSlice, int64_t);
INSTANTIATION(EigenSlice, float);
INSTANTIATION(EigenSlice, double);
INSTANTIATION(EigenSlice, platform::float16);
INSTANTIATION(EigenSlice, platform::complex64);
INSTANTIATION(EigenSlice, platform::complex128);
#undef INSTANTIATION
} // namespace operators
} // namespace paddle
...@@ -130,3 +130,14 @@ REGISTER_OP_CPU_KERNEL( ...@@ -130,3 +130,14 @@ REGISTER_OP_CPU_KERNEL(
ops::ErfGradKernel<paddle::platform::CPUDeviceContext, double>, ops::ErfGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ErfGradKernel<paddle::platform::CPUDeviceContext, ops::ErfGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::float16>); paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
erf, ops::ErfKernel<paddle::platform::CUDADeviceContext, float>,
ops::ErfKernel<paddle::platform::CUDADeviceContext, double>,
ops::ErfKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
erf_grad, ops::ErfGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ErfGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::ErfGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include <cmath> #include <cmath>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -35,7 +36,8 @@ class ErfKernel : public framework::OpKernel<T> { ...@@ -35,7 +36,8 @@ class ErfKernel : public framework::OpKernel<T> {
auto eigen_in = framework::EigenVector<T>::Flatten(*in); auto eigen_in = framework::EigenVector<T>::Flatten(*in);
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
eigen_out.device(place) = eigen_in.erf(); EigenErf<std::decay_t<decltype(place)>, T>::Eval(place, eigen_out,
eigen_in);
} }
}; };
...@@ -55,8 +57,8 @@ class ErfGradKernel : public framework::OpKernel<T> { ...@@ -55,8 +57,8 @@ class ErfGradKernel : public framework::OpKernel<T> {
auto eigen_dx = framework::EigenVector<T>::Flatten(*dx); auto eigen_dx = framework::EigenVector<T>::Flatten(*dx);
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
eigen_dx.device(place) = EigenErfGrad<std::decay_t<decltype(place)>, T>::Eval(place, eigen_dx,
eigen_dout * static_cast<T>(M_2_SQRTPI) * (-(eigen_x.square())).exp(); eigen_x, eigen_dout);
} }
}; };
......
...@@ -143,3 +143,10 @@ REGISTER_OP_CPU_KERNEL( ...@@ -143,3 +143,10 @@ REGISTER_OP_CPU_KERNEL(
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
hinge_loss_grad, hinge_loss_grad,
ops::HingeLossGradKernel<paddle::platform::CPUDeviceContext, float>); ops::HingeLossGradKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
hinge_loss,
ops::HingeLossKernel<paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
hinge_loss_grad,
ops::HingeLossGradKernel<paddle::platform::CUDADeviceContext, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/hinge_loss_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
hinge_loss,
ops::HingeLossKernel<paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
hinge_loss_grad,
ops::HingeLossGradKernel<paddle::platform::CUDADeviceContext, float>);
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -33,9 +34,7 @@ class HingeLossKernel : public framework::OpKernel<T> { ...@@ -33,9 +34,7 @@ class HingeLossKernel : public framework::OpKernel<T> {
auto y = framework::EigenVector<T>::Flatten(*label); auto y = framework::EigenVector<T>::Flatten(*label);
loss->mutable_data<T>(context.GetPlace()); loss->mutable_data<T>(context.GetPlace());
auto l = framework::EigenVector<T>::Flatten(*loss); auto l = framework::EigenVector<T>::Flatten(*loss);
l.device(place) = EigenHingeLoss<std::decay_t<decltype(place)>, T>::Eval(place, l, x, y);
(static_cast<T>(1) - x * (static_cast<T>(2) * y - static_cast<T>(1)))
.cwiseMax(static_cast<T>(0));
} }
}; };
...@@ -59,10 +58,8 @@ class HingeLossGradKernel : public framework::OpKernel<T> { ...@@ -59,10 +58,8 @@ class HingeLossGradKernel : public framework::OpKernel<T> {
if (dpred) { if (dpred) {
dpred->mutable_data<T>(context.GetPlace()); dpred->mutable_data<T>(context.GetPlace());
auto dx = framework::EigenVector<T>::Flatten(*dpred); auto dx = framework::EigenVector<T>::Flatten(*dpred);
auto alt_labels = static_cast<T>(2) * y - static_cast<T>(1); EigenHingeLossGrad<std::decay_t<decltype(place)>, T>::Eval(place, dx, dl,
dx.device(place) = x, y);
dl * ((x * alt_labels) < static_cast<T>(1)).template cast<T>() *
(-alt_labels);
} }
} }
}; };
......
...@@ -192,3 +192,10 @@ REGISTER_OP_CPU_KERNEL( ...@@ -192,3 +192,10 @@ REGISTER_OP_CPU_KERNEL(
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
im2sequence_grad, im2sequence_grad,
ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>); ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
im2sequence,
ops::Im2SequenceKernel<paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
im2sequence_grad,
ops::Im2SequenceGradKernel<paddle::platform::CUDADeviceContext, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
im2sequence,
ops::Im2SequenceKernel<paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
im2sequence_grad,
ops::Im2SequenceGradKernel<paddle::platform::CUDADeviceContext, float>);
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/math/im2col.h" #include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
...@@ -157,7 +158,7 @@ class Im2SequenceGradKernel : public framework::OpKernel<T> { ...@@ -157,7 +158,7 @@ class Im2SequenceGradKernel : public framework::OpKernel<T> {
auto x_v = framework::EigenVector<T>::Flatten(*d_x); auto x_v = framework::EigenVector<T>::Flatten(*d_x);
auto& place = *ctx.template device_context<DeviceContext>().eigen_device(); auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
x_v.device(place) = x_v.constant(0.0); EigenConstant<std::decay_t<decltype(place)>, T, 1>::Eval(place, x_v, 0.0);
auto in_dim = in->dims(); auto in_dim = in->dims();
int batch_size = in_dim[0]; int batch_size = in_dim[0];
......
...@@ -107,3 +107,9 @@ REGISTER_OP_CPU_KERNEL( ...@@ -107,3 +107,9 @@ REGISTER_OP_CPU_KERNEL(
ops::IncrementKernel<paddle::platform::CPUDeviceContext, double>, ops::IncrementKernel<paddle::platform::CPUDeviceContext, double>,
ops::IncrementKernel<paddle::platform::CPUDeviceContext, int>, ops::IncrementKernel<paddle::platform::CPUDeviceContext, int>,
ops::IncrementKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::IncrementKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
increment, ops::IncrementKernel<paddle::platform::CUDADeviceContext, float>,
ops::IncrementKernel<paddle::platform::CUDADeviceContext, double>,
ops::IncrementKernel<paddle::platform::CUDADeviceContext, int>,
ops::IncrementKernel<paddle::platform::CUDADeviceContext, int64_t>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/increment_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
increment, ops::IncrementKernel<paddle::platform::CUDADeviceContext, float>,
ops::IncrementKernel<paddle::platform::CUDADeviceContext, double>,
ops::IncrementKernel<paddle::platform::CUDADeviceContext, int>,
ops::IncrementKernel<paddle::platform::CUDADeviceContext, int64_t>);
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -30,8 +31,9 @@ class IncrementKernel : public framework::OpKernel<T> { ...@@ -30,8 +31,9 @@ class IncrementKernel : public framework::OpKernel<T> {
out_tensor->mutable_data<T>(context.GetPlace()); out_tensor->mutable_data<T>(context.GetPlace());
auto& dev = auto& dev =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
framework::EigenScalar<T>::From(*out_tensor).device(dev) = EigenAdd<std::decay_t<decltype(dev)>, T>::Eval(
framework::EigenScalar<T>::From(*x_tensor) + static_cast<T>(step); dev, framework::EigenScalar<T>::From(*out_tensor),
framework::EigenScalar<T>::From(*x_tensor), static_cast<T>(step));
} }
}; };
......
...@@ -91,3 +91,9 @@ REGISTER_OP_CPU_KERNEL( ...@@ -91,3 +91,9 @@ REGISTER_OP_CPU_KERNEL(
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
l1_norm_grad, l1_norm_grad,
ops::L1NormGradKernel<paddle::platform::CPUDeviceContext, float>); ops::L1NormGradKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
l1_norm, ops::L1NormKernel<paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
l1_norm_grad,
ops::L1NormGradKernel<paddle::platform::CUDADeviceContext, float>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/l1_norm_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
l1_norm, ops::L1NormKernel<paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
l1_norm_grad,
ops::L1NormGradKernel<paddle::platform::CUDADeviceContext, float>);
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -33,7 +34,7 @@ class L1NormKernel : public framework::OpKernel<T> { ...@@ -33,7 +34,7 @@ class L1NormKernel : public framework::OpKernel<T> {
auto &place = auto &place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
out.device(place) = x.abs().sum(); EigenL1Norm<std::decay_t<decltype(place)>, T>::Eval(place, out, x);
} }
}; };
...@@ -59,8 +60,9 @@ class L1NormGradKernel : public framework::OpKernel<T> { ...@@ -59,8 +60,9 @@ class L1NormGradKernel : public framework::OpKernel<T> {
auto &place = auto &place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
Eigen::DSizes<int, 1> x_dsize(x->numel()); Eigen::DSizes<Eigen::DenseIndex, 1> x_dsize(x->numel());
dx_eigen.device(place) = d_out_eigen.broadcast(x_dsize) * x_eigen.sign(); EigenL1NormGrad<std::decay_t<decltype(place)>, T>::Eval(
place, dx_eigen, d_out_eigen, x_eigen, x_dsize);
} }
}; };
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -29,7 +30,7 @@ template <typename DeviceContext, typename T, size_t D> ...@@ -29,7 +30,7 @@ template <typename DeviceContext, typename T, size_t D>
void PadFunction(const framework::ExecutionContext& context, void PadFunction(const framework::ExecutionContext& context,
const std::vector<int>& pads, const framework::Tensor& src, const std::vector<int>& pads, const framework::Tensor& src,
T pad_value, framework::Tensor* out) { T pad_value, framework::Tensor* out) {
Eigen::array<std::pair<int, int>, D> paddings; std::array<std::pair<int64_t, int64_t>, D> paddings;
for (size_t i = 0; i < paddings.size(); ++i) { for (size_t i = 0; i < paddings.size(); ++i) {
paddings[i].first = pads[i * 2]; paddings[i].first = pads[i * 2];
...@@ -41,14 +42,15 @@ void PadFunction(const framework::ExecutionContext& context, ...@@ -41,14 +42,15 @@ void PadFunction(const framework::ExecutionContext& context,
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
out_tensor.device(place) = src_tensor.pad(paddings, pad_value); EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
place, out_tensor, src_tensor, paddings, pad_value);
} }
template <typename DeviceContext, typename T, size_t D> template <typename DeviceContext, typename T, size_t D>
void PadGradFunction(const framework::ExecutionContext& context, void PadGradFunction(const framework::ExecutionContext& context,
const std::vector<int>& pads, const framework::Tensor& src, const std::vector<int>& pads, const framework::Tensor& src,
framework::Tensor* d_out) { framework::Tensor* d_out) {
Eigen::array<std::pair<int, int>, D> paddings; std::array<std::pair<int64_t, int64_t>, D> paddings;
for (size_t i = 0; i < paddings.size(); ++i) { for (size_t i = 0; i < paddings.size(); ++i) {
paddings[i].first = -pads[i * 2]; paddings[i].first = -pads[i * 2];
paddings[i].second = -pads[i * 2 + 1]; paddings[i].second = -pads[i * 2 + 1];
...@@ -58,7 +60,8 @@ void PadGradFunction(const framework::ExecutionContext& context, ...@@ -58,7 +60,8 @@ void PadGradFunction(const framework::ExecutionContext& context,
auto src_tensor = EigenTensor<T, D>::From(src); auto src_tensor = EigenTensor<T, D>::From(src);
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
d_out_tensor.device(place) = src_tensor.pad(paddings, static_cast<T>(0)); EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
place, d_out_tensor, src_tensor, paddings, static_cast<T>(0));
} }
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
......
...@@ -146,3 +146,6 @@ REGISTER_OPERATOR(minus, ops::MinusOp, ops::MinusOpMaker, ...@@ -146,3 +146,6 @@ REGISTER_OPERATOR(minus, ops::MinusOp, ops::MinusOpMaker,
ops::MinusGradDescMaker, ops::MinusGradMaker); ops::MinusGradDescMaker, ops::MinusGradMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
minus, ops::MinusKernel<paddle::platform::CPUDeviceContext, float>); minus, ops::MinusKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(
minus, ops::MinusKernel<paddle::platform::CUDADeviceContext, float>);
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -30,9 +31,10 @@ class MinusKernel : public framework::OpKernel<T> { ...@@ -30,9 +31,10 @@ class MinusKernel : public framework::OpKernel<T> {
out_tensor->mutable_data<T>(context.GetPlace()); out_tensor->mutable_data<T>(context.GetPlace());
auto& dev = auto& dev =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
framework::EigenVector<T>::Flatten(*out_tensor).device(dev) = EigenSub<std::decay_t<decltype(dev)>, T>::Eval(
framework::EigenVector<T>::Flatten(*left_tensor) - dev, framework::EigenVector<T>::Flatten(*out_tensor),
framework::EigenVector<T>::Flatten(*right_tensor); framework::EigenVector<T>::Flatten(*left_tensor),
framework::EigenVector<T>::Flatten(*right_tensor));
} }
}; };
......
...@@ -246,3 +246,18 @@ REGISTER_OP_CPU_KERNEL( ...@@ -246,3 +246,18 @@ REGISTER_OP_CPU_KERNEL(
ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext, int>, ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext, ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext,
int64_t>); int64_t>);
REGISTER_OP_CUDA_KERNEL(
pad_constant_like,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, double>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
pad_constant_like_grad,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
int64_t>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
double>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pad_constant_like_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
pad_constant_like,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, double>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
pad_constant_like_grad,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
int64_t>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
double>);
...@@ -174,3 +174,16 @@ REGISTER_OP_CPU_KERNEL( ...@@ -174,3 +174,16 @@ REGISTER_OP_CPU_KERNEL(
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
pad_grad, ops::PadGradKernel<paddle::platform::CPUDeviceContext, float>, pad_grad, ops::PadGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::PadGradKernel<paddle::platform::CPUDeviceContext, double>); ops::PadGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
pad, ops::PadKernel<paddle::platform::CUDADeviceContext, double>,
ops::PadKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::PadKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
REGISTER_OP_CUDA_KERNEL(
pad_grad, ops::PadGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::PadGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pad_op.h"
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
pad, ops::PadKernel<paddle::platform::CUDADeviceContext, double>,
ops::PadKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::PadKernel<paddle::platform::CUDADeviceContext, plat::float16>);
REGISTER_OP_CUDA_KERNEL(
pad_grad, ops::PadGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::PadGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadGradKernel<paddle::platform::CUDADeviceContext, plat::float16>);
...@@ -231,3 +231,10 @@ REGISTER_OP_CPU_KERNEL( ...@@ -231,3 +231,10 @@ REGISTER_OP_CPU_KERNEL(
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
rank_loss_grad, rank_loss_grad,
ops::RankLossGradKernel<paddle::platform::CPUDeviceContext, float>); ops::RankLossGradKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(rank_loss,
paddle::operators::RankLossKernel<
paddle::platform::CUDADeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(rank_loss_grad,
paddle::operators::RankLossGradKernel<
paddle::platform::CUDADeviceContext, float>);
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -36,8 +37,8 @@ class RankLossKernel : public framework::OpKernel<T> { ...@@ -36,8 +37,8 @@ class RankLossKernel : public framework::OpKernel<T> {
auto right = framework::EigenVector<T>::Flatten(*right_t); auto right = framework::EigenVector<T>::Flatten(*right_t);
auto& dev = *ctx.template device_context<DeviceContext>().eigen_device(); auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
out.device(dev) = EigenRankLoss<std::decay_t<decltype(dev)>, T>::Eval(dev, out, label, left,
(1.0f + (left - right).exp()).log() - label * (left - right); right);
} }
}; };
...@@ -65,15 +66,15 @@ class RankLossGradKernel : public framework::OpKernel<T> { ...@@ -65,15 +66,15 @@ class RankLossGradKernel : public framework::OpKernel<T> {
if (d_left_t) { if (d_left_t) {
d_left_t->mutable_data<T>(ctx.GetPlace()); d_left_t->mutable_data<T>(ctx.GetPlace());
auto d_left = framework::EigenVector<T>::Flatten(*d_left_t); auto d_left = framework::EigenVector<T>::Flatten(*d_left_t);
d_left.device(dev) = EigenRankLossGrad<std::decay_t<decltype(dev)>, T>::EvalLeft(
d_out * (1.0f / (1.0f + (right - left).exp()) - label); dev, d_left, d_out, label, left, right);
} }
// compute d_right // compute d_right
if (d_right_t) { if (d_right_t) {
d_right_t->mutable_data<T>(ctx.GetPlace()); d_right_t->mutable_data<T>(ctx.GetPlace());
auto d_right = framework::EigenVector<T>::Flatten(*d_right_t); auto d_right = framework::EigenVector<T>::Flatten(*d_right_t);
d_right.device(dev) = EigenRankLossGrad<std::decay_t<decltype(dev)>, T>::EvalRight(
-d_out * (1.0f / (1.0f + (right - left).exp()) - label); dev, d_right, d_out, label, left, right);
} }
} }
}; };
......
...@@ -145,4 +145,12 @@ REGISTER_OP_CPU_KERNEL( ...@@ -145,4 +145,12 @@ REGISTER_OP_CPU_KERNEL(
ops::ReverseKernel<paddle::platform::CPUDeviceContext, int64_t>, ops::ReverseKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ReverseKernel<paddle::platform::CPUDeviceContext, bool>, ops::ReverseKernel<paddle::platform::CPUDeviceContext, bool>,
ops::ReverseKernel<paddle::platform::CPUDeviceContext, float>, ops::ReverseKernel<paddle::platform::CPUDeviceContext, float>,
ops::ReverseKernel<paddle::platform::CPUDeviceContext, double>) ops::ReverseKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
reverse, ops::ReverseKernel<paddle::platform::CUDADeviceContext, int>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, uint8_t>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, bool>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, float>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, double>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reverse_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
reverse, ops::ReverseKernel<paddle::platform::CUDADeviceContext, int>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, uint8_t>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, bool>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, float>,
ops::ReverseKernel<paddle::platform::CUDADeviceContext, double>)
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -23,7 +24,7 @@ template <typename DeviceContext, typename T, int Rank> ...@@ -23,7 +24,7 @@ template <typename DeviceContext, typename T, int Rank>
struct ReverseFunctor { struct ReverseFunctor {
void operator()(const DeviceContext& context, const framework::LoDTensor& in, void operator()(const DeviceContext& context, const framework::LoDTensor& in,
framework::LoDTensor* out, const std::vector<int>& axis) { framework::LoDTensor* out, const std::vector<int>& axis) {
Eigen::array<bool, Rank> reverse_axis; Eigen::DSizes<bool, Rank> reverse_axis;
for (int i = 0; i < Rank; ++i) { for (int i = 0; i < Rank; ++i) {
reverse_axis[i] = false; reverse_axis[i] = false;
} }
...@@ -37,9 +38,10 @@ struct ReverseFunctor { ...@@ -37,9 +38,10 @@ struct ReverseFunctor {
auto in_eigen = framework::EigenTensor<T, Rank>::From(in); auto in_eigen = framework::EigenTensor<T, Rank>::From(in);
auto out_eigen = framework::EigenTensor<T, Rank>::From(*out); auto out_eigen = framework::EigenTensor<T, Rank>::From(*out);
auto* dev = context.eigen_device(); auto& dev = *context.eigen_device();
out_eigen.device(*dev) = in_eigen.reverse(reverse_axis); EigenReverse<std::decay_t<decltype(dev)>, T, Rank>::Eval(
dev, out_eigen, in_eigen, reverse_axis);
} }
}; };
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/scale_op.h" #include "paddle/fluid/operators/scale_op.h"
#include <string> #include <string>
#include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -155,3 +156,18 @@ REGISTER_OP_CPU_KERNEL( ...@@ -155,3 +156,18 @@ REGISTER_OP_CPU_KERNEL(
ops::ScaleKernel<paddle::platform::CPUDeviceContext, int16_t>, ops::ScaleKernel<paddle::platform::CPUDeviceContext, int16_t>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext, int>, ops::ScaleKernel<paddle::platform::CPUDeviceContext, int>,
ops::ScaleKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::ScaleKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
scale,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, float>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, double>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
uint8_t>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int8_t>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
int16_t>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
int64_t>,
paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -68,11 +69,8 @@ class ScaleKernel : public framework::OpKernel<T> { ...@@ -68,11 +69,8 @@ class ScaleKernel : public framework::OpKernel<T> {
auto eigen_out = framework::EigenVector<T>::Flatten(*out); auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*in); auto eigen_in = framework::EigenVector<T>::Flatten(*in);
auto& dev = *ctx.template device_context<DeviceContext>().eigen_device(); auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
if (bias_after_scale) { EigenScale<std::decay_t<decltype(dev)>, T>::Eval(
eigen_out.device(dev) = scale * eigen_in + bias; dev, eigen_out, eigen_in, scale, bias, bias_after_scale);
} else {
eigen_out.device(dev) = scale * (eigen_in + bias);
}
} }
}; };
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/operators/sign_op.h" #include "paddle/fluid/operators/sign_op.h"
#include <memory> #include <memory>
#include "paddle/fluid/platform/float16.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -69,3 +70,10 @@ REGISTER_OPERATOR(sign, ops::SignOp, ops::SignOpMaker<float>, ...@@ -69,3 +70,10 @@ REGISTER_OPERATOR(sign, ops::SignOp, ops::SignOpMaker<float>,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
sign, ops::SignKernel<paddle::platform::CPUDeviceContext, float>, sign, ops::SignKernel<paddle::platform::CPUDeviceContext, float>,
ops::SignKernel<paddle::platform::CPUDeviceContext, double>); ops::SignKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CUDA_KERNEL(
sign,
paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, float>,
paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, double>,
paddle::operators::SignKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>);
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -31,7 +32,8 @@ class SignKernel : public framework::OpKernel<T> { ...@@ -31,7 +32,8 @@ class SignKernel : public framework::OpKernel<T> {
auto eigen_in = framework::EigenVector<T>::Flatten(*in); auto eigen_in = framework::EigenVector<T>::Flatten(*in);
auto& place = auto& place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
eigen_out.device(place) = eigen_in.sign(); EigenSign<std::decay_t<decltype(place)>, T>::Eval(place, eigen_out,
eigen_in);
} }
}; };
......
...@@ -449,3 +449,28 @@ REGISTER_OP_CPU_KERNEL( ...@@ -449,3 +449,28 @@ REGISTER_OP_CPU_KERNEL(
paddle::platform::complex64>, paddle::platform::complex64>,
ops::SliceGradKernel<paddle::platform::CPUDeviceContext, ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex128>); paddle::platform::complex128>);
REGISTER_OP_CUDA_KERNEL(
slice, ops::SliceKernel<paddle::platform::CUDADeviceContext, float>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, double>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, int>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::complex64>,
ops::SliceKernel<paddle::platform::CUDADeviceContext,
paddle::platform::complex128>);
REGISTER_OP_CUDA_KERNEL(
slice_grad,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::float16>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::complex64>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
paddle::platform::complex128>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/slice_op.h"
#include "paddle/fluid/platform/float16.h"
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
slice, ops::SliceKernel<paddle::platform::CUDADeviceContext, float>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, double>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, int>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, plat::float16>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, plat::complex64>,
ops::SliceKernel<paddle::platform::CUDADeviceContext, plat::complex128>);
REGISTER_OP_CUDA_KERNEL(
slice_grad,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, plat::float16>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext, plat::complex64>,
ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
plat::complex128>);
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/operators/utils.h"
...@@ -238,8 +239,8 @@ class SliceKernel : public framework::OpKernel<T> { ...@@ -238,8 +239,8 @@ class SliceKernel : public framework::OpKernel<T> {
out->mutable_data<T>(context.GetPlace()); out->mutable_data<T>(context.GetPlace());
auto new_out_dims = out->dims(); auto new_out_dims = out->dims();
auto offsets = Eigen::array<int64_t, D>(); auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
auto extents = Eigen::array<int64_t, D>(); auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
for (size_t i = 0; i < D; ++i) { for (size_t i = 0; i < D; ++i) {
offsets[i] = 0; offsets[i] = 0;
extents[i] = new_out_dims[i]; extents[i] = new_out_dims[i];
...@@ -268,10 +269,12 @@ class SliceKernel : public framework::OpKernel<T> { ...@@ -268,10 +269,12 @@ class SliceKernel : public framework::OpKernel<T> {
offsets_32bit[i] = offsets[i]; offsets_32bit[i] = offsets[i];
extents_32bit[i] = extents[i]; extents_32bit[i] = extents[i];
} }
framework::To32BitIndex(out_t).device(place) = EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(
framework::To32BitIndex(in_t).slice(offsets_32bit, extents_32bit); place, framework::To32BitIndex(out_t), framework::To32BitIndex(in_t),
offsets_32bit, extents_32bit);
} else { } else {
out_t.device(place) = in_t.slice(offsets, extents); EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(place, out_t, in_t,
offsets, extents);
} }
out->Resize(out_dims); out->Resize(out_dims);
...@@ -624,10 +627,12 @@ class SliceGradKernel : public framework::OpKernel<T> { ...@@ -624,10 +627,12 @@ class SliceGradKernel : public framework::OpKernel<T> {
paddings_32bit[i] = paddings_32bit[i] =
std::make_pair(paddings[i].first, paddings[i].second); std::make_pair(paddings[i].first, paddings[i].second);
} }
framework::To32BitIndex(d_in_t).device(place) = EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
framework::To32BitIndex(d_out_t).pad(paddings_32bit, T(0)); place, framework::To32BitIndex(d_in_t),
framework::To32BitIndex(d_out_t), paddings_32bit, static_cast<T>(0));
} else { } else {
d_in_t.device(place) = d_out_t.pad(paddings, T(0)); EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
place, d_in_t, d_out_t, paddings, static_cast<T>(0));
} }
} }
}; };
......
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/eigen/eigen_function.h"
#include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/math/concat_and_split.h"
#include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/bfloat16.h"
...@@ -402,8 +403,8 @@ void _sliceCompute(const framework::Tensor *in, framework::Tensor *out, ...@@ -402,8 +403,8 @@ void _sliceCompute(const framework::Tensor *in, framework::Tensor *out,
auto out_dims = out->dims(); auto out_dims = out->dims();
auto in_dims = in->dims(); auto in_dims = in->dims();
auto offsets = Eigen::array<int, D>(); auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
auto extents = Eigen::array<int, D>(); auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
for (size_t i = 0; i < D; ++i) { for (size_t i = 0; i < D; ++i) {
offsets[i] = 0; offsets[i] = 0;
extents[i] = out_dims[i]; extents[i] = out_dims[i];
...@@ -423,7 +424,8 @@ void _sliceCompute(const framework::Tensor *in, framework::Tensor *out, ...@@ -423,7 +424,8 @@ void _sliceCompute(const framework::Tensor *in, framework::Tensor *out,
auto out_t = auto out_t =
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From( framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
*out); *out);
out_t.device(eigen_place) = in_t.slice(offsets, extents); operators::EigenSlice<std::decay_t<decltype(eigen_place)>, T, D>::Eval(
eigen_place, out_t, in_t, offsets, extents);
} }
template <typename T> template <typename T>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册