From 09096aebd49b1c07b8c0fea29206413f1ca938cc Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 1 Jul 2022 21:25:21 -0500 Subject: [PATCH] unify cpu context (#43989) * unify cpu context * fix init() * delete test_device_context * fix test_scalar --- .../elementwise/elementwise_add_op.cc | 6 - .../elementwise/elementwise_floordiv_op.cc | 3 - .../elementwise/elementwise_max_op.cc | 3 - .../elementwise/elementwise_min_op.cc | 3 - .../elementwise/elementwise_mod_op.cc | 3 - .../elementwise/elementwise_pow_op.cc | 3 - .../elementwise/elementwise_sub_op.cc | 3 - paddle/fluid/operators/increment_op.cc | 3 - paddle/fluid/operators/isfinite_op.cc | 3 - paddle/fluid/operators/isfinite_v2_op.cc | 3 - paddle/fluid/operators/label_smooth_op.cc | 3 - paddle/fluid/operators/math/beam_search.cc | 21 +- .../fluid/operators/math/concat_and_split.cc | 8 +- .../fluid/operators/math/context_project.cc | 11 +- .../fluid/operators/math/cos_sim_functor.cc | 7 - paddle/fluid/operators/math/cross_entropy.cc | 9 - paddle/fluid/operators/math/gru_compute.cc | 6 - paddle/fluid/operators/math/im2col.cc | 30 - paddle/fluid/operators/math/math_function.cc | 335 ---------- paddle/fluid/operators/math/maxouting.cc | 5 - paddle/fluid/operators/math/sample_prob.cc | 13 +- .../operators/math/selected_rows_functor.cc | 86 +-- .../fluid/operators/math/sequence_padding.cc | 113 ---- paddle/fluid/operators/math/sequence_scale.cc | 26 - paddle/fluid/operators/math/softmax.cc | 7 - paddle/fluid/operators/math/vol2col.cc | 250 ------- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 1 - paddle/fluid/operators/rank_loss_op.cc | 3 - .../operators/reduce_ops/frobenius_norm_op.cc | 3 - .../operators/reduce_ops/reduce_all_op.cc | 3 - .../operators/reduce_ops/reduce_any_op.cc | 3 - .../operators/reduce_ops/reduce_prod_op.cc | 3 - .../operators/reduce_ops/reduce_sum_op.cc | 3 - paddle/fluid/operators/set_value_op.cc | 3 - paddle/fluid/platform/device_context.cc | 8 - paddle/fluid/platform/device_context.h | 9 +- paddle/fluid/platform/transform.h | 24 - paddle/infrt/kernel/phi/context_kernels.cc | 1 - .../infershaped/infershape_launchers_test.cc | 1 - paddle/phi/backends/cpu/cpu_context.cc | 10 +- paddle/phi/backends/cpu/cpu_context.h | 6 - paddle/phi/kernels/funcs/blas/blas_impl.h | 616 ------------------ paddle/phi/kernels/funcs/fc_functor.cc | 2 - paddle/phi/kernels/funcs/for_range.h | 16 - paddle/phi/kernels/funcs/gru_compute.cc | 185 ------ paddle/phi/kernels/funcs/lstm_compute.cc | 80 --- paddle/phi/kernels/funcs/math_function.cc | 134 ++-- paddle/phi/kernels/funcs/matrix_inverse.cc | 4 - paddle/phi/tests/api/test_sparse_utils_api.cc | 1 - paddle/phi/tests/common/test_scalar.cu | 7 - paddle/phi/tests/core/CMakeLists.txt | 4 - paddle/phi/tests/core/test_device_context.cc | 54 -- paddle/phi/tests/kernels/test_cast_dev_api.cc | 1 - .../phi/tests/kernels/test_concat_dev_api.cc | 1 - paddle/phi/tests/kernels/test_conj_dev_api.cc | 1 - paddle/phi/tests/kernels/test_copy_dev_api.cc | 1 - .../tests/kernels/test_creation_dev_api.cc | 4 - paddle/phi/tests/kernels/test_dot_dev_api.cc | 1 - .../tests/kernels/test_elementwise_dev_api.cc | 4 - .../phi/tests/kernels/test_flatten_dev_api.cc | 1 - .../phi/tests/kernels/test_math_function.cc | 1 - .../phi/tests/kernels/test_matmul_dev_api.cc | 1 - paddle/phi/tests/kernels/test_mean_dev_api.cc | 1 - .../phi/tests/kernels/test_reshape_dev_api.cc | 1 - .../phi/tests/kernels/test_scale_dev_api.cc | 2 - .../kernels/test_sparse_activation_dev_api.cc | 1 - .../kernels/test_sparse_conv3d_dev_api.cc | 1 - .../test_sparse_elementwise_dev_api.cc | 4 - .../tests/kernels/test_sparse_pool_dev_api.cc | 1 - .../kernels/test_sparse_utils_dev_api.cc | 6 - .../phi/tests/kernels/test_split_dev_api.cc | 1 - paddle/phi/tests/kernels/test_sum_dev_api.cc | 1 - 72 files changed, 61 insertions(+), 2121 deletions(-) delete mode 100644 paddle/fluid/operators/math/math_function.cc delete mode 100644 paddle/phi/tests/core/test_device_context.cc diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index c71f6b7c3cd..0123df0006f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -20,12 +20,6 @@ namespace paddle { namespace framework { class OpDesc; } // namespace framework -namespace imperative { -class OpBase; -} // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc index 422cbd881d2..6a8c986a53c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc @@ -25,9 +25,6 @@ class EmptyGradOpMaker; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.cc b/paddle/fluid/operators/elementwise/elementwise_max_op.cc index 58e9c6d7b4c..1911b5c2de6 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.cc @@ -23,9 +23,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cc b/paddle/fluid/operators/elementwise/elementwise_min_op.cc index 8b967cb1fe1..9fd70754888 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.cc @@ -23,9 +23,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc index ee67f7e4020..55d6e214d6c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc @@ -25,9 +25,6 @@ class EmptyGradOpMaker; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.cc b/paddle/fluid/operators/elementwise/elementwise_pow_op.cc index c13fba99bdb..fcfee9b4fca 100644 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op.cc @@ -20,9 +20,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc index a9968906fb9..24f0228025f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc @@ -23,9 +23,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/increment_op.cc b/paddle/fluid/operators/increment_op.cc index 7d62bf2d628..3ab6b9f9405 100644 --- a/paddle/fluid/operators/increment_op.cc +++ b/paddle/fluid/operators/increment_op.cc @@ -25,9 +25,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc index a7fc4865f78..bcab28df3a1 100644 --- a/paddle/fluid/operators/isfinite_op.cc +++ b/paddle/fluid/operators/isfinite_op.cc @@ -26,9 +26,6 @@ class EmptyGradOpMaker; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/isfinite_v2_op.cc b/paddle/fluid/operators/isfinite_v2_op.cc index 314bbf556ae..65857b6d87d 100644 --- a/paddle/fluid/operators/isfinite_v2_op.cc +++ b/paddle/fluid/operators/isfinite_v2_op.cc @@ -34,9 +34,6 @@ namespace operators { template class OverflowKernel; } // namespace operators -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/label_smooth_op.cc b/paddle/fluid/operators/label_smooth_op.cc index ccd4db816bd..873ab62a3d2 100644 --- a/paddle/fluid/operators/label_smooth_op.cc +++ b/paddle/fluid/operators/label_smooth_op.cc @@ -24,9 +24,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/math/beam_search.cc b/paddle/fluid/operators/math/beam_search.cc index cda085aabe9..2b607ade728 100644 --- a/paddle/fluid/operators/math/beam_search.cc +++ b/paddle/fluid/operators/math/beam_search.cc @@ -13,26 +13,19 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/beam_search.h" - +#include "paddle/phi/backends/cpu/cpu_context.h" namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework {} // namespace framework -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace paddle { namespace operators { namespace math { template -class BeamSearchFunctor { +class BeamSearchFunctor { public: - void operator()(const platform::CPUDeviceContext &context, + void operator()(const phi::CPUContext &context, const framework::LoDTensor *pre_ids, const framework::LoDTensor *pre_scores, const framework::LoDTensor *ids, @@ -308,10 +301,10 @@ class BeamSearchFunctor { } }; -template class BeamSearchFunctor; -template class BeamSearchFunctor; -template class BeamSearchFunctor; -template class BeamSearchFunctor; +template class BeamSearchFunctor; +template class BeamSearchFunctor; +template class BeamSearchFunctor; +template class BeamSearchFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/concat_and_split.cc b/paddle/fluid/operators/math/concat_and_split.cc index 4ce2db1e579..3df69e20019 100644 --- a/paddle/fluid/operators/math/concat_and_split.cc +++ b/paddle/fluid/operators/math/concat_and_split.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/concat_and_split.h" +#include "paddle/fluid/platform/device_context.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" #ifdef PADDLE_WITH_ASCEND_CL @@ -28,13 +29,6 @@ namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework {} // namespace framework -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/context_project.cc b/paddle/fluid/operators/math/context_project.cc index 927d610e2ce..beee93ae016 100644 --- a/paddle/fluid/operators/math/context_project.cc +++ b/paddle/fluid/operators/math/context_project.cc @@ -13,19 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/context_project.h" - -namespace paddle { -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle +#include "paddle/phi/backends/cpu/cpu_context.h" namespace paddle { namespace operators { namespace math { -template class ContextProjectFunctor; -template class ContextProjectFunctor; +template class ContextProjectFunctor; +template class ContextProjectFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/cos_sim_functor.cc b/paddle/fluid/operators/math/cos_sim_functor.cc index 4a3da2ef86d..85f012afb50 100644 --- a/paddle/fluid/operators/math/cos_sim_functor.cc +++ b/paddle/fluid/operators/math/cos_sim_functor.cc @@ -14,16 +14,9 @@ limitations under the License. */ #include "paddle/fluid/operators/math/cos_sim_functor.h" -namespace paddle { -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace paddle { namespace operators { namespace math { - template struct CosSimDyFunctor { void operator()(const platform::CPUDeviceContext& ctx, diff --git a/paddle/fluid/operators/math/cross_entropy.cc b/paddle/fluid/operators/math/cross_entropy.cc index 45c7e47b810..17ff6aff6f9 100644 --- a/paddle/fluid/operators/math/cross_entropy.cc +++ b/paddle/fluid/operators/math/cross_entropy.cc @@ -17,12 +17,6 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/phi/backends/cpu/cpu_context.h" -namespace paddle { -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace paddle { namespace operators { namespace math { @@ -129,9 +123,6 @@ void CrossEntropyFunctor::operator()( } } -template class CrossEntropyFunctor; -template class CrossEntropyFunctor; - template class CrossEntropyFunctor; template class CrossEntropyFunctor; } // namespace math diff --git a/paddle/fluid/operators/math/gru_compute.cc b/paddle/fluid/operators/math/gru_compute.cc index d8fa1b5a869..7e543a63afc 100644 --- a/paddle/fluid/operators/math/gru_compute.cc +++ b/paddle/fluid/operators/math/gru_compute.cc @@ -15,12 +15,6 @@ limitations under the License. */ #include "paddle/fluid/operators/math/detail/gru_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" -namespace paddle { -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/im2col.cc b/paddle/fluid/operators/math/im2col.cc index e7ed2cbf675..9192badedcf 100644 --- a/paddle/fluid/operators/math/im2col.cc +++ b/paddle/fluid/operators/math/im2col.cc @@ -16,12 +16,6 @@ limitations under the License. */ #include "paddle/fluid/operators/math/im2col_cfo_cpu.h" -namespace paddle { -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace phi { class CPUContext; } // namespace phi @@ -166,24 +160,12 @@ class Col2ImFunctor; -template class Im2ColFunctor; template class Im2ColFunctor; template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; template class Col2ImFunctor; @@ -353,24 +335,12 @@ class Col2ImFunctor; -template class Im2ColFunctor; template class Im2ColFunctor; template class Im2ColFunctor; -template class Col2ImFunctor; -template class Col2ImFunctor; template class Col2ImFunctor; diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc deleted file mode 100644 index 5eff0a5d457..00000000000 --- a/paddle/fluid/operators/math/math_function.cc +++ /dev/null @@ -1,335 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/math/math_function.h" - -#ifdef PADDLE_WITH_MKLML -#include "paddle/fluid/platform/dynload/mklml.h" -#endif - -#ifdef PADDLE_USE_OPENBLAS -#include -#endif - -#include -#include -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/operators/math/math_function_impl.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" -#include "unsupported/Eigen/CXX11/Tensor" - -namespace paddle { -namespace operators { -namespace math { - -using float16 = paddle::platform::float16; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - -#ifdef PADDLE_WITH_XPU -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; -#endif - -#define DEFINE_CPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose, \ - RANK>; \ - template struct Transpose, \ - RANK>; - -DEFINE_CPU_TRANS(1); -DEFINE_CPU_TRANS(2); -DEFINE_CPU_TRANS(3); -DEFINE_CPU_TRANS(4); -DEFINE_CPU_TRANS(5); -DEFINE_CPU_TRANS(6); - -template -struct TransposeNormal { - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& in, - framework::Tensor* out, - const std::vector& axis) { - const int rank = axis.size(); - auto in_stride = phi::stride(in.dims()); - auto out_stride = phi::stride(out->dims()); - const T* in_ptr = in.data(); - T* out_ptr = out->data(); - - auto transpose_helper = [&](int64_t beg, int64_t end) { - for (int64_t out_idx = beg; out_idx < end; ++out_idx) { - int64_t in_idx = 0; - int64_t tmp_idx = out_idx; - // calculate the input index - for (int i = 0; i < rank; ++i) { - const int64_t coordinate = tmp_idx / out_stride[i]; - tmp_idx -= coordinate * out_stride[i]; - in_idx += coordinate * in_stride[axis[i]]; - } - out_ptr[out_idx] = in_ptr[in_idx]; - } - }; - transpose_helper(0, out->numel()); - } -}; - -// define transpose normal -#define DEFINE_CPU_TRANS_NORMAL(TYPE) \ - template struct TransposeNormal - -DEFINE_CPU_TRANS_NORMAL(platform::float16); -DEFINE_CPU_TRANS_NORMAL(platform::bfloat16); -DEFINE_CPU_TRANS_NORMAL(float); -DEFINE_CPU_TRANS_NORMAL(double); -DEFINE_CPU_TRANS_NORMAL(int); -DEFINE_CPU_TRANS_NORMAL(int64_t); -DEFINE_CPU_TRANS_NORMAL(bool); -DEFINE_CPU_TRANS_NORMAL(int16_t); -DEFINE_CPU_TRANS_NORMAL(uint8_t); -DEFINE_CPU_TRANS_NORMAL(int8_t); -DEFINE_CPU_TRANS_NORMAL(platform::complex); -DEFINE_CPU_TRANS_NORMAL(platform::complex); - -struct TensorSetConstantCPU { - TensorSetConstantCPU(framework::Tensor* tensor, float value) - : tensor_(tensor), value_(value) {} - template - void apply() const { - auto cpu = platform::CPUPlace(); - auto* begin = tensor_->mutable_data(cpu); - std::fill(begin, begin + tensor_->numel(), static_cast(value_)); - } - framework::Tensor* tensor_; - float value_; -}; - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("NPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - PADDLE_THROW( - platform::errors::Unimplemented("NPUPinnedPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("IPUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - framework::VisitDataType(tensor->type(), TensorSetConstantCPU(tensor, value)); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("MLUPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - PADDLE_THROW(platform::errors::Unimplemented("CustomPlace is not supported")); -} - -template <> -void set_constant_with_place( - const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - framework::VisitDataType(tensor->type(), TensorSetConstantCPU(tensor, value)); -} - -struct TensorSetConstantWithPlace : public boost::static_visitor { - TensorSetConstantWithPlace(const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) - : context_(context), tensor_(tensor), value_(value) {} - - template - void operator()(Place place) const { - set_constant_with_place(context_, tensor_, value_); - } - - const platform::DeviceContext& context_; - framework::Tensor* tensor_; - float value_; -}; - -void set_constant(const platform::DeviceContext& context, - framework::Tensor* tensor, - float value) { - TensorSetConstantWithPlace func(context, tensor, value); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - // tensor->place().apply_visitor(func); - paddle::platform::VisitPlace(tensor->place(), func); -#else - func(platform::CPUPlace()); -#endif -} - -template -struct RowwiseAdd { - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& vector, - framework::Tensor* output) { - auto in_dims = input.dims(); - auto out_dims = output->dims(); - auto size = input.numel() / in_dims[0]; - PADDLE_ENFORCE_EQ( - vector.numel(), - size, - platform::errors::InvalidArgument( - "The input vector size" - " should be equal to the size of each row of input tensor." - " Expected vector size=%d, but received %d", - size, - vector.numel())); - const char* in_dims_cstr = in_dims.to_str().c_str(); - const char* out_dims_cstr = out_dims.to_str().c_str(); - PADDLE_ENFORCE_EQ(out_dims, - in_dims, - platform::errors::InvalidArgument( - "The output tensor shape should be same as the input" - " tensor shape. Expected output tensor shape: %s," - " but received %s", - in_dims_cstr, - out_dims_cstr)); - - auto in = framework::EigenMatrix::From(input); - auto vec = framework::EigenVector::Flatten(vector); - auto out = framework::EigenMatrix::From(*output); - - for (int64_t i = 0; i < in_dims[0]; ++i) { - out.chip(i, 0) = in.chip(i, 0) + vec; - } - } -}; - -template struct RowwiseAdd; -template struct RowwiseAdd; - -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; - -template struct RowwiseSum; -template struct RowwiseSum; - -template struct RowwiseMean; -template struct RowwiseMean; - -template -struct ElementwiseAddTo { - void operator()(platform::CPUDeviceContext* ctx, - const framework::Tensor& src, - framework::Tensor* dst) { - auto in = framework::EigenVector::Flatten(src); - auto out = framework::EigenVector::Flatten(*dst); - auto& place = *(ctx->eigen_device()); - out.device(place) = out + in; - } -}; - -template struct ElementwiseAddTo; - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/math/maxouting.cc b/paddle/fluid/operators/math/maxouting.cc index 7729b86cc3e..2205ed51e19 100644 --- a/paddle/fluid/operators/math/maxouting.cc +++ b/paddle/fluid/operators/math/maxouting.cc @@ -109,11 +109,6 @@ void MaxOutGradFunctor::operator()( } } -template class MaxOutGradFunctor; -template class MaxOutGradFunctor; -template class MaxOutFunctor; -template class MaxOutFunctor; - template class MaxOutGradFunctor; template class MaxOutGradFunctor; template class MaxOutFunctor; diff --git a/paddle/fluid/operators/math/sample_prob.cc b/paddle/fluid/operators/math/sample_prob.cc index 16342493e45..18321cf9b9e 100644 --- a/paddle/fluid/operators/math/sample_prob.cc +++ b/paddle/fluid/operators/math/sample_prob.cc @@ -14,19 +14,8 @@ limitations under the License. */ #include "paddle/fluid/operators/math/sample_prob.h" -namespace paddle { -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace paddle { namespace operators { -namespace math { - -template class SampleWithProb; -template class SampleWithProb; - -} // namespace math +namespace math {} // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index 81b0e9102bb..399a1b6dc4c 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -276,51 +276,6 @@ struct SelectedRowsSumTo { template struct SelectedRowsSumTo; template struct SelectedRowsSumTo; -template -struct SelectedRowsAddToTensor { - void operator()(const platform::CPUDeviceContext& context, - const phi::SelectedRows& input1, - framework::Tensor* input2) { - if (UNLIKELY(input1.rows().size() == 0)) { - LOG(WARNING) << "input selected rows is empty!"; - return; - } - auto in1_height = input1.height(); - const auto& in2_dims = input2->dims(); - PADDLE_ENFORCE_EQ( - in1_height, - in2_dims[0], - platform::errors::InvalidArgument("The two inputs height must be equal." - "But received first input height = " - "[%d], second input height = [%d]", - in1_height, - in2_dims[0])); - - auto& in1_value = input1.value(); - auto& in1_rows = input1.rows(); - - int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); - PADDLE_ENFORCE_EQ( - in1_row_numel, - input2->numel() / in1_height, - platform::errors::InvalidArgument( - "The two inputs width must be equal." - "But received first input width = [%d], second input width = [%d]", - in1_row_numel, - input2->numel() / in1_height)); - - auto* in1_data = in1_value.data(); - auto* input2_data = input2->data(); - - for (size_t i = 0; i < in1_rows.size(); i++) { - for (int64_t j = 0; j < in1_row_numel; j++) { - input2_data[in1_rows[i] * in1_row_numel + j] += - in1_data[i * in1_row_numel + j]; - } - } - } -}; - template struct SelectedRowsAddToTensor { void operator()(const phi::CPUContext& context, @@ -366,13 +321,6 @@ struct SelectedRowsAddToTensor { } }; -template struct SelectedRowsAddToTensor; -template struct SelectedRowsAddToTensor; -template struct SelectedRowsAddToTensor; -template struct SelectedRowsAddToTensor; -template struct SelectedRowsAddToTensor; - template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; template struct SelectedRowsAddToTensor; @@ -582,34 +530,6 @@ struct MergeAddImpl { } }; -template -struct MergeAdd { - // unary functor, merge by adding duplicated rows in - // the input SelectedRows object. - phi::SelectedRows operator()(const platform::CPUDeviceContext& context, - const phi::SelectedRows& input, - const bool sorted_result) { - return MergeAddImpl()( - context, input, sorted_result); - } - - void operator()(const platform::CPUDeviceContext& context, - const phi::SelectedRows& input, - phi::SelectedRows* output, - const bool sorted_result) { - MergeAddImpl()( - context, input, output, sorted_result); - } - - void operator()(const platform::CPUDeviceContext& context, - const std::vector& inputs, - phi::SelectedRows* output, - const bool sorted_result) { - MergeAddImpl()( - context, inputs, output, sorted_result); - } -}; - template struct MergeAdd { // unary functor, merge by adding duplicated rows in @@ -635,10 +555,8 @@ struct MergeAdd { } }; -#define TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(dtype) \ - template struct MergeAddImpl; \ - template struct MergeAddImpl; \ - template struct MergeAdd; \ +#define TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(dtype) \ + template struct MergeAddImpl; \ template struct MergeAdd; TEMPLATE_SPECIALIZED_FOR_MERGEADD_CPU(float) diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 1a952bbb62d..826eda5559a 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -20,13 +20,6 @@ namespace phi { class DenseTensor; } // namespace phi -namespace paddle { -namespace framework {} // namespace framework -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace paddle { namespace operators { namespace math { @@ -101,66 +94,6 @@ static void fast_mem_init(void* dest, } } -template -class PaddingLoDTensorFunctor { - public: - void operator()(const platform::CPUDeviceContext& context, - const framework::LoDTensor& seq_tensor, - framework::LoDTensor* pad_tensor, - const framework::LoDTensor& pad_value, - int pad_seq_len = -1, - int lod_level = 0, - bool norm_by_times = false, - const PadLayout layout = kBatchLengthWidth) { - auto seq_lod = seq_tensor.lod(); - const auto seq_offsets = framework::ToAbsOffset(seq_lod)[lod_level]; - const auto& seq_tensor_dims = seq_tensor.dims(); - const auto& pad_tensor_dims = pad_tensor->dims(); - if (pad_seq_len == -1) { - pad_seq_len = MaximumSequenceLength(seq_offsets); - } - int step_width = seq_tensor.numel() / seq_tensor_dims[0]; - - CheckDims(seq_tensor_dims, - pad_tensor_dims, - seq_offsets, - pad_seq_len, - step_width, - layout); - - PADDLE_ENFORCE_EQ( - pad_value.numel() == 1 || pad_value.numel() == step_width, - true, - platform::errors::InvalidArgument( - "The numel of 'pad_value' can only be 1 or be equal to the " - "'step_width', but got %ld != 1 and %ld. Please check the input " - "value.", - pad_value.numel(), - step_width)); - - // fill padding value - T* pad_data = pad_tensor->data(); - const T* pad_value_data = pad_value.data(); - if (pad_value.numel() == 1) { - fast_mem_init( - pad_data, pad_tensor->numel(), pad_value_data, sizeof(T)); - } else { - for (int i = 0; i < pad_tensor->numel(); i += step_width) { - memcpy(pad_data + i, pad_value_data, step_width * sizeof(T)); - } - } - - CopyValidData(pad_tensor, - &seq_tensor, - seq_offsets, - pad_seq_len, - step_width, - norm_by_times, - kSeqToPad, - layout); - } -}; - template class PaddingLoDTensorFunctor { public: @@ -221,42 +154,6 @@ class PaddingLoDTensorFunctor { } }; -template -class UnpaddingLoDTensorFunctor { - public: - void operator()(const platform::CPUDeviceContext& context, - const framework::LoDTensor& pad_tensor, - framework::LoDTensor* seq_tensor, - int pad_seq_len = -1, - int lod_level = 0, - bool norm_by_times = false, - const PadLayout layout = kBatchLengthWidth) { - auto seq_offsets = framework::ToAbsOffset(seq_tensor->lod())[lod_level]; - const auto& seq_tensor_dims = seq_tensor->dims(); - const auto& pad_tensor_dims = pad_tensor.dims(); - if (pad_seq_len == -1) { - pad_seq_len = MaximumSequenceLength(seq_offsets); - } - int step_width = seq_tensor->numel() / seq_tensor_dims[0]; - - CheckDims(seq_tensor_dims, - pad_tensor_dims, - seq_offsets, - pad_seq_len, - step_width, - layout); - - CopyValidData(seq_tensor, - &pad_tensor, - seq_offsets, - pad_seq_len, - step_width, - norm_by_times, - kPadToSeq, - layout); - } -}; - template class UnpaddingLoDTensorFunctor { public: @@ -293,16 +190,6 @@ class UnpaddingLoDTensorFunctor { } }; -template class PaddingLoDTensorFunctor; -template class PaddingLoDTensorFunctor; -template class PaddingLoDTensorFunctor; -template class PaddingLoDTensorFunctor; - -template class UnpaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; -template class UnpaddingLoDTensorFunctor; - template class PaddingLoDTensorFunctor; template class PaddingLoDTensorFunctor; template class PaddingLoDTensorFunctor; diff --git a/paddle/fluid/operators/math/sequence_scale.cc b/paddle/fluid/operators/math/sequence_scale.cc index cd91b2eb531..8faf9572bef 100644 --- a/paddle/fluid/operators/math/sequence_scale.cc +++ b/paddle/fluid/operators/math/sequence_scale.cc @@ -24,29 +24,6 @@ namespace paddle { namespace operators { namespace math { -template -class ScaleLoDTensorFunctor { - public: - void operator()(const platform::CPUDeviceContext& context, - const T* scales, - framework::LoDTensor* seq) { - const size_t level = 0; - auto lod = seq->lod(); - const size_t num_seq = lod[level].size() - 1; - size_t seq_width = seq->dims()[1]; - framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - - T* seq_data = seq->mutable_data(context.GetPlace()); - for (size_t i = 0; i < num_seq; ++i) { - for (size_t j = lod[level][i] * seq_width; - j < lod[level][i + 1] * seq_width; - ++j) { - seq_data[j] *= scales[i]; - } - } - } -}; - template class ScaleLoDTensorFunctor { public: @@ -70,9 +47,6 @@ class ScaleLoDTensorFunctor { } }; -template class ScaleLoDTensorFunctor; -template class ScaleLoDTensorFunctor; - template class ScaleLoDTensorFunctor; template class ScaleLoDTensorFunctor; diff --git a/paddle/fluid/operators/math/softmax.cc b/paddle/fluid/operators/math/softmax.cc index adea86a6c5a..730dcbf59a6 100644 --- a/paddle/fluid/operators/math/softmax.cc +++ b/paddle/fluid/operators/math/softmax.cc @@ -21,13 +21,6 @@ namespace paddle { namespace operators { namespace math { -template class SoftmaxFunctor; -template class SoftmaxFunctor; -template class SoftmaxFunctor; -template class SoftmaxFunctor; -template class SoftmaxGradFunctor; -template class SoftmaxGradFunctor; - template class SoftmaxFunctor; template class SoftmaxFunctor; template class SoftmaxFunctor; diff --git a/paddle/fluid/operators/math/vol2col.cc b/paddle/fluid/operators/math/vol2col.cc index 7b687909306..36ce3e64742 100644 --- a/paddle/fluid/operators/math/vol2col.cc +++ b/paddle/fluid/operators/math/vol2col.cc @@ -16,12 +16,6 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" -namespace paddle { -namespace platform { -class CPUDeviceContext; -} // namespace platform -} // namespace paddle - namespace paddle { namespace operators { namespace math { @@ -32,126 +26,6 @@ namespace math { * [input_channels, filter_depth, filter_height, filter_width, * output_depth, output_height, output_width] */ -template -class Vol2ColFunctor { - public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& vol, - const std::vector& dilations, - const std::vector& strides, - const std::vector& paddings, - framework::Tensor* col, - const DataLayout data_layout) const { - PADDLE_ENFORCE_EQ(vol.dims().size(), - 4, - platform::errors::InvalidArgument( - "The dimension of vol should be 4, but received %d.", - vol.dims().size())); - - PADDLE_ENFORCE_EQ(col->dims().size(), - 7, - platform::errors::InvalidArgument( - "The dimension of col should be 7, but received %d.", - col->dims().size())); - - int input_channels = - (data_layout != DataLayout::kNHWC ? vol.dims()[0] : vol.dims()[3]); - int input_depth = - (data_layout != DataLayout::kNHWC ? vol.dims()[1] : vol.dims()[0]); - int input_height = - (data_layout != DataLayout::kNHWC ? vol.dims()[2] : vol.dims()[1]); - int input_width = - (data_layout != DataLayout::kNHWC ? vol.dims()[3] : vol.dims()[2]); - int filter_depth = col->dims()[1]; - int filter_height = col->dims()[2]; - int filter_width = col->dims()[3]; - int output_depth = col->dims()[4]; - int output_height = col->dims()[5]; - int output_width = col->dims()[6]; - int channels_col = - input_channels * filter_depth * filter_height * filter_width; - - // changed - bool paddings_size_is_6 = (paddings.size() == 6); - int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0]; - int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0]; - int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1]; - int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1]; - int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2]; - int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2]; - - auto input_depth_tmp = (input_depth + pad_d_forth + pad_d_back - - ((dilations[0] * (filter_depth - 1) + 1))) / - strides[0] + - 1; - PADDLE_ENFORCE_EQ( - input_depth_tmp, - output_depth, - platform::errors::InvalidArgument( - "input_depth(%d) and output_depth(%d) are mismatching.", - input_depth_tmp, - output_depth)); - auto input_height_tmp = (input_height + pad_h_up + pad_h_down - - ((dilations[1] * (filter_height - 1) + 1))) / - strides[1] + - 1; - PADDLE_ENFORCE_EQ( - input_height_tmp, - output_height, - platform::errors::InvalidArgument( - "input_height(%d) and output_height(%d) are mismatching.", - input_height_tmp, - output_height)); - auto input_width_tmp = (input_width + pad_w_left + pad_w_right - - ((dilations[2] * (filter_width - 1) + 1))) / - strides[2] + - 1; - PADDLE_ENFORCE_EQ( - input_width_tmp, - output_width, - platform::errors::InvalidArgument( - "input_width(%d) and output_width(%d) are mismatching.", - input_width_tmp, - output_width)); - const T* vol_data = vol.data(); - T* col_data = col->data(); - - for (int c = 0; c < channels_col; ++c) { - int w_offset = c % filter_width; - int h_offset = (c / filter_width) % filter_height; - int d_offset = (c / filter_width / filter_height) % filter_depth; - int c_in = c / filter_width / filter_height / filter_depth; - for (int d = 0; d < output_depth; ++d) { - int d_pad = d * strides[0] - pad_d_forth + d_offset * dilations[0]; - for (int h = 0; h < output_height; ++h) { - int h_pad = h * strides[1] - pad_h_up + h_offset * dilations[1]; - for (int w = 0; w < output_width; ++w) { - int w_pad = w * strides[2] - pad_w_left + w_offset * dilations[2]; - - int col_idx = - ((c * output_depth + d) * output_height + h) * output_width + w; - int vol_idx; - if (data_layout != DataLayout::kNHWC) { - vol_idx = ((c_in * input_depth + d_pad) * input_height + h_pad) * - input_width + - w_pad; - } else { - vol_idx = ((d_pad * input_height + h_pad) * input_width + w_pad) * - input_channels + - c_in; - } - col_data[col_idx] = - (h_pad < 0 || h_pad >= input_height || w_pad < 0 || - w_pad >= input_width || d_pad < 0 || d_pad >= input_depth) - ? static_cast(0) - : vol_data[vol_idx]; - } - } - } - } - } -}; - template class Vol2ColFunctor { public: @@ -278,126 +152,6 @@ class Vol2ColFunctor { * [input_channels, filter_depth, filter_height, filter_width, * output_depth, output_height, output_width] */ -template -class Col2VolFunctor { - public: - void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& col, - const std::vector& dilations, - const std::vector& strides, - const std::vector& paddings, - framework::Tensor* vol, - const DataLayout data_layout) const { - PADDLE_ENFORCE_EQ(vol->dims().size(), - 4, - platform::errors::InvalidArgument( - "The dimension of vol should be 4, but received %d.", - vol->dims().size())); - - PADDLE_ENFORCE_EQ(col.dims().size(), - 7, - platform::errors::InvalidArgument( - "The dimension of col should be 7, but received %d.", - col.dims().size())); - - int input_channels = - (data_layout != DataLayout::kNHWC ? vol->dims()[0] : vol->dims()[3]); - int input_depth = - (data_layout != DataLayout::kNHWC ? vol->dims()[1] : vol->dims()[0]); - int input_height = - (data_layout != DataLayout::kNHWC ? vol->dims()[2] : vol->dims()[1]); - int input_width = - (data_layout != DataLayout::kNHWC ? vol->dims()[3] : vol->dims()[2]); - int filter_depth = col.dims()[1]; - int filter_height = col.dims()[2]; - int filter_width = col.dims()[3]; - int output_depth = col.dims()[4]; - int output_height = col.dims()[5]; - int output_width = col.dims()[6]; - int channels_col = - input_channels * filter_depth * filter_height * filter_width; - - bool paddings_size_is_6 = (paddings.size() == 6); - int pad_d_forth = paddings_size_is_6 ? paddings[0] : paddings[0]; - int pad_d_back = paddings_size_is_6 ? paddings[1] : paddings[0]; - int pad_h_up = paddings_size_is_6 ? paddings[2] : paddings[1]; - int pad_h_down = paddings_size_is_6 ? paddings[3] : paddings[1]; - int pad_w_left = paddings_size_is_6 ? paddings[4] : paddings[2]; - int pad_w_right = paddings_size_is_6 ? paddings[5] : paddings[2]; - - auto input_depth_tmp = (input_depth + pad_d_forth + pad_d_back - - ((dilations[0] * (filter_depth - 1) + 1))) / - strides[0] + - 1; - PADDLE_ENFORCE_EQ( - input_depth_tmp, - output_depth, - platform::errors::InvalidArgument( - "input_depth(%d) and output_depth(%d) are mismatching.", - input_depth_tmp, - output_depth)); - auto input_height_tmp = (input_height + pad_h_up + pad_h_down - - ((dilations[1] * (filter_height - 1) + 1))) / - strides[1] + - 1; - PADDLE_ENFORCE_EQ( - input_height_tmp, - output_height, - platform::errors::InvalidArgument( - "input_height(%d) and output_height(%d) are mismatching.", - input_height_tmp, - output_height)); - auto input_width_tmp = (input_width + pad_w_left + pad_w_right - - ((dilations[2] * (filter_width - 1) + 1))) / - strides[2] + - 1; - PADDLE_ENFORCE_EQ( - input_width_tmp, - output_width, - platform::errors::InvalidArgument( - "input_width(%d) and output_width(%d) are mismatching.", - input_width_tmp, - output_width)); - T* vol_data = vol->data(); - const T* col_data = col.data(); - - for (int c = 0; c < channels_col; ++c) { - int w_offset = c % filter_width; - int h_offset = (c / filter_width) % filter_height; - int d_offset = (c / filter_width / filter_height) % filter_depth; - int cIm = c / filter_width / filter_height / filter_depth; - for (int d = 0; d < output_depth; ++d) { - int d_pad = d * strides[0] - pad_d_forth + d_offset * dilations[0]; - for (int h = 0; h < output_height; ++h) { - int h_pad = h * strides[1] - pad_h_up + h_offset * dilations[1]; - for (int w = 0; w < output_width; ++w) { - int w_pad = w * strides[2] - pad_w_left + w_offset * dilations[2]; - - if (h_pad >= 0 && h_pad < input_height && w_pad >= 0 && - w_pad < input_width && d_pad >= 0 && d_pad < input_depth) { - int vol_idx; - if (data_layout != DataLayout::kNHWC) { - vol_idx = ((cIm * input_depth + d_pad) * input_height + h_pad) * - input_width + - w_pad; - } else { - vol_idx = - ((d_pad * input_height + h_pad) * input_width + w_pad) * - input_channels + - cIm; - } - int col_idx = - ((c * output_depth + d) * output_height + h) * output_width + - w; - vol_data[vol_idx] += col_data[col_idx]; - } - } - } - } - } - } -}; - template class Col2VolFunctor { public: @@ -518,13 +272,9 @@ class Col2VolFunctor { } }; -template class Vol2ColFunctor; -template class Vol2ColFunctor; template class Vol2ColFunctor; template class Vol2ColFunctor; -template class Col2VolFunctor; -template class Col2VolFunctor; template class Col2VolFunctor; template class Col2VolFunctor; diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index a92d9ec2f2b..bd6d55fb7b3 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -34,7 +34,6 @@ class DenseTensor; namespace paddle { namespace framework {} // namespace framework namespace platform { -class CPUDeviceContext; class MKLDNNDeviceContext; } // namespace platform } // namespace paddle diff --git a/paddle/fluid/operators/rank_loss_op.cc b/paddle/fluid/operators/rank_loss_op.cc index 05f2fb7067e..49d6424394a 100644 --- a/paddle/fluid/operators/rank_loss_op.cc +++ b/paddle/fluid/operators/rank_loss_op.cc @@ -24,9 +24,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc b/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc index c7b0e8ced59..7fba45fa539 100644 --- a/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc +++ b/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc @@ -27,9 +27,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/reduce_ops/reduce_all_op.cc b/paddle/fluid/operators/reduce_ops/reduce_all_op.cc index 6947ca5b71a..f0de9466635 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_all_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_all_op.cc @@ -27,9 +27,6 @@ class EmptyGradOpMaker; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle DECLARE_INFER_SHAPE_FUNCTOR(reduce_all, diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op.cc index 85e262add2e..6634ccaaa01 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op.cc @@ -26,9 +26,6 @@ class EmptyGradOpMaker; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle DECLARE_INFER_SHAPE_FUNCTOR(reduce_any, diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc index 1c88c4cb708..578954663c7 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc @@ -25,9 +25,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc index ca24cc9c634..d072dcfa5eb 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc @@ -27,9 +27,6 @@ class OpDesc; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/operators/set_value_op.cc b/paddle/fluid/operators/set_value_op.cc index da2cf4c0dbe..074642e1b02 100644 --- a/paddle/fluid/operators/set_value_op.cc +++ b/paddle/fluid/operators/set_value_op.cc @@ -31,9 +31,6 @@ class EmptyGradOpMaker; namespace imperative { class OpBase; } // namespace imperative -namespace platform { -class CPUDeviceContext; -} // namespace platform } // namespace paddle namespace paddle { diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 3ad22def690..ec7f46cd973 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -367,14 +367,6 @@ DeviceContextPool::DeviceContextPool( /*disable_setting_default_stream_for_allocator=*/false); } -CPUDeviceContext::CPUDeviceContext() : phi::CPUContext() { - phi::CPUContext::Init(); -} - -CPUDeviceContext::CPUDeviceContext(CPUPlace place) : phi::CPUContext(place) { - phi::CPUContext::Init(); -} - #ifdef PADDLE_WITH_IPU IPUDeviceContext::IPUDeviceContext(IPUPlace place) : place_(place) {} diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index c6cc29d9ca1..2c3bc017635 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -134,14 +134,7 @@ constexpr DeviceType kMLU = DeviceType::MLU; using DeviceContext = phi::DeviceContext; -// using CPUDeviceContext = phi::CPUContext; -// TODO(wilber): The place constructor is used in many places, it is more -// difficult to use CPUDeviceContext = phi::CPUContext directly. -class CPUDeviceContext : public phi::CPUContext { - public: - CPUDeviceContext(); - explicit CPUDeviceContext(CPUPlace place); -}; +using CPUDeviceContext = phi::CPUContext; template struct DefaultDeviceContextType; diff --git a/paddle/fluid/platform/transform.h b/paddle/fluid/platform/transform.h index 45756372e22..575415ef890 100644 --- a/paddle/fluid/platform/transform.h +++ b/paddle/fluid/platform/transform.h @@ -69,30 +69,6 @@ struct Transform { }; // NOTE: After the phi kernel is migrated, it needs to be deleted. -template <> -struct Transform { - template - void operator()(const platform::CPUDeviceContext& context, - InputIter first, - InputIter last, - OutputIter result, - UnaryOperation op) { - std::transform(first, last, result, op); - } - - template - void operator()(const platform::CPUDeviceContext& context, - InputIter1 first1, - InputIter1 last1, - InputIter2 first2, - OutputIter result, - BinaryOperation op) { - std::transform(first1, last1, first2, result, op); - } -}; template <> struct Transform { diff --git a/paddle/infrt/kernel/phi/context_kernels.cc b/paddle/infrt/kernel/phi/context_kernels.cc index 23d96aeb8d5..9c5ab13d17b 100644 --- a/paddle/infrt/kernel/phi/context_kernels.cc +++ b/paddle/infrt/kernel/phi/context_kernels.cc @@ -20,7 +20,6 @@ namespace phi { ::phi::CPUContext CreateCPUContext() { ::phi::CPUContext ctx{}; - ctx.Init(); auto allocator = new backends::CpuPhiAllocator{}; ctx.SetAllocator(allocator); ctx.SetHostAllocator(allocator); diff --git a/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc b/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc index 5a314817c24..aa577da60c3 100644 --- a/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc +++ b/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc @@ -81,7 +81,6 @@ TEST(ElementwiseAdd, launcher_registry) { ::phi::CPUContext context; context.SetAllocator(alloc); - context.Init(); host_context::KernelFrameBuilder kernel_frame_builder; kernel_frame_builder.AddArgument(new host_context::Value(std::move(context))); diff --git a/paddle/phi/backends/cpu/cpu_context.cc b/paddle/phi/backends/cpu/cpu_context.cc index 42e19944b21..63b5d82f3bd 100644 --- a/paddle/phi/backends/cpu/cpu_context.cc +++ b/paddle/phi/backends/cpu/cpu_context.cc @@ -51,10 +51,14 @@ struct CPUContext::Impl { }; CPUContext::CPUContext() - : DeviceContext(), impl_(std::make_unique()) {} + : DeviceContext(), impl_(std::make_unique()) { + impl_->Init(); +} CPUContext::CPUContext(const Place& place) - : DeviceContext(), impl_(std::make_unique(place)) {} + : DeviceContext(), impl_(std::make_unique(place)) { + impl_->Init(); +} CPUContext::~CPUContext() = default; @@ -62,8 +66,6 @@ CPUContext::CPUContext(CPUContext&&) = default; CPUContext& CPUContext::operator=(CPUContext&&) = default; -void CPUContext::Init() { impl_->Init(); } - Eigen::DefaultDevice* CPUContext::eigen_device() const { return impl_->GetEigenDevice(); } diff --git a/paddle/phi/backends/cpu/cpu_context.h b/paddle/phi/backends/cpu/cpu_context.h index e482fdc9e04..58548b2e04e 100644 --- a/paddle/phi/backends/cpu/cpu_context.h +++ b/paddle/phi/backends/cpu/cpu_context.h @@ -34,12 +34,6 @@ class PADDLE_API CPUContext : public DeviceContext { Eigen::DefaultDevice* eigen_device() const; const Place& GetPlace() const override; - public: - // NOTE: DeviceContext hold resources. Used in training scenarios. - // The interface used by the training scene, DeviceContext will initialize - // all resources and delete them when destructing. - void Init(); - protected: // NOTE: External users manage resources. Used in inference scenarios. // The Set interface is for inference only, DeviceContext will mark the diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.h b/paddle/phi/kernels/funcs/blas/blas_impl.h index db4796b3f61..a18ec953d0a 100644 --- a/paddle/phi/kernels/funcs/blas/blas_impl.h +++ b/paddle/phi/kernels/funcs/blas/blas_impl.h @@ -1003,12 +1003,6 @@ struct CBlas { #ifdef PADDLE_WITH_MKLML template <> template -T *Blas::GEMM_ALLOC( - const CBLAS_IDENTIFIER id, const int M, const int N, const int K) const { - return CBlas::GEMM_ALLOC(id, M, N, K); -} -template <> -template T *Blas::GEMM_ALLOC(const CBLAS_IDENTIFIER id, const int M, const int N, @@ -1016,20 +1010,6 @@ T *Blas::GEMM_ALLOC(const CBLAS_IDENTIFIER id, return CBlas::GEMM_ALLOC(id, M, N, K); } -template <> -template -void Blas::GEMM_PACK( - const CBLAS_IDENTIFIER id, - const CBLAS_TRANSPOSE trans, - int M, - int N, - int K, - const T alpha, - const T *src, - const int ld, - T *dst) const { - CBlas::GEMM_PACK(CblasRowMajor, id, trans, M, N, K, alpha, src, ld, dst); -} template <> template void Blas::GEMM_PACK(const CBLAS_IDENTIFIER id, @@ -1044,24 +1024,6 @@ void Blas::GEMM_PACK(const CBLAS_IDENTIFIER id, CBlas::GEMM_PACK(CblasRowMajor, id, trans, M, N, K, alpha, src, ld, dst); } -template <> -template -void Blas::GEMM_COMPUTE( - int transA, - int transB, - int M, - int N, - int K, - const T *A, - const int lda, - const T *B, - const int ldb, - T beta, - T *C, - const int ldc) const { - CBlas::GEMM_COMPUTE( - CblasRowMajor, transA, transB, M, N, K, A, lda, B, ldb, beta, C, ldc); -} template <> template void Blas::GEMM_COMPUTE(int transA, @@ -1080,11 +1042,6 @@ void Blas::GEMM_COMPUTE(int transA, CblasRowMajor, transA, transB, M, N, K, A, lda, B, ldb, beta, C, ldc); } -template <> -template -void Blas::GEMM_FREE(T *data) const { - CBlas::GEMM_FREE(data); -} template <> template void Blas::GEMM_FREE(T *data) const { @@ -1092,36 +1049,6 @@ void Blas::GEMM_FREE(T *data) const { } #endif -template <> -template -void Blas::GEMM(CBLAS_TRANSPOSE transA, - CBLAS_TRANSPOSE transB, - int M, - int N, - int K, - T alpha, - const T *A, - const T *B, - T beta, - T *C) const { - int lda = (transA == CblasNoTrans) ? K : M; - int ldb = (transB == CblasNoTrans) ? N : K; - int ldc = N; - CBlas::GEMM(CblasRowMajor, - transA, - transB, - M, - N, - K, - alpha, - A, - lda, - B, - ldb, - beta, - C, - ldc); -} template <> template void Blas::GEMM(CBLAS_TRANSPOSE transA, @@ -1153,36 +1080,6 @@ void Blas::GEMM(CBLAS_TRANSPOSE transA, ldc); } -template <> -template -void Blas::GEMM(bool transA, - bool transB, - int M, - int N, - int K, - T alpha, - const T *A, - int lda, - const T *B, - int ldb, - T beta, - T *C, - int ldc) const { - CBlas::GEMM(CblasRowMajor, - transA == false ? CblasNoTrans : CblasTrans, - transB == false ? CblasNoTrans : CblasTrans, - M, - N, - K, - alpha, - A, - lda, - B, - ldb, - beta, - C, - ldc); -} template <> template void Blas::GEMM(bool transA, @@ -1214,36 +1111,6 @@ void Blas::GEMM(bool transA, ldc); } -template <> -template -void Blas::GEMM(CBLAS_TRANSPOSE transA, - CBLAS_TRANSPOSE transB, - int M, - int N, - int K, - T alpha, - const T *A, - int lda, - const T *B, - int ldb, - T beta, - T *C, - int ldc) const { - CBlas::GEMM(CblasRowMajor, - transA, - transB, - M, - N, - K, - alpha, - A, - lda, - B, - ldb, - beta, - C, - ldc); -} template <> template void Blas::GEMM(CBLAS_TRANSPOSE transA, @@ -1323,50 +1190,18 @@ void Blas::MatMul(const phi::DenseTensor &mat_a, mat_out->data()); } -template <> -template -void Blas::AXPY(int n, - T alpha, - const T *x, - T *y) const { - CBlas::AXPY(n, alpha, x, 1, y, 1); -} template <> template void Blas::AXPY(int n, T alpha, const T *x, T *y) const { CBlas::AXPY(n, alpha, x, 1, y, 1); } -template <> -template -void Blas::VCOPY(int n, - const T *x, - T *y) const { - CBlas::VCOPY(n, x, 1, y, 1); -} template <> template void Blas::VCOPY(int n, const T *x, T *y) const { CBlas::VCOPY(n, x, 1, y, 1); } -template <> -template -void Blas::VADD(int n, - const T *x, - const T *y, - T *z) const { -#ifdef PADDLE_WITH_MKLML - CBlas::VADD(n, x, y, z); -#else - if (x == z) { - this->template AXPY(n, (T)(1.), y, z); - } else { - this->template VCOPY(n, y, z); - this->template AXPY(n, (T)(1.), x, z); - } -#endif -} template <> template void Blas::VADD(int n, const T *x, const T *y, T *z) const { @@ -1382,21 +1217,6 @@ void Blas::VADD(int n, const T *x, const T *y, T *z) const { #endif } -template <> -template -void Blas::VSUB(int n, - const T *x, - const T *y, - T *z) const { -#ifdef PADDLE_WITH_MKLML - CBlas::VSUB(n, x, y, z); -#else - // try to find if openblas support vsub - for (int i = 0; i < n; ++i) { - z[i] = x[i] - y[i]; - } -#endif -} template <> template void Blas::VSUB(int n, const T *x, const T *y, T *z) const { @@ -1410,21 +1230,6 @@ void Blas::VSUB(int n, const T *x, const T *y, T *z) const { #endif } -template <> -template -void Blas::VMUL(int n, - const T *x, - const T *y, - T *z) const { -#ifdef PADDLE_WITH_MKLML - CBlas::VMUL(n, x, y, z); -#else - // try to find if openblas support vmul - for (int i = 0; i < n; ++i) { - z[i] = x[i] * y[i]; - } -#endif -} template <> template void Blas::VMUL(int n, const T *x, const T *y, T *z) const { @@ -1438,21 +1243,6 @@ void Blas::VMUL(int n, const T *x, const T *y, T *z) const { #endif } -template <> -template -void Blas::VDIV(int n, - const T *x, - const T *y, - T *z) const { -#ifdef PADDLE_WITH_MKLML - CBlas::VDIV(n, x, y, z); -#else - // try to find if openblas support vdiv - for (int i = 0; i < n; ++i) { - z[i] = x[i] / y[i]; - } -#endif -} template <> template void Blas::VDIV(int n, const T *x, const T *y, T *z) const { @@ -1466,20 +1256,6 @@ void Blas::VDIV(int n, const T *x, const T *y, T *z) const { #endif } -template <> -template -void Blas::VEXP(int n, - const T *x, - T *y) const { -#ifdef PADDLE_WITH_MKLML - CBlas::VEXP(n, x, y); -#else - // try to find if openblas support vexp - for (int i = 0; i < n; ++i) { - y[i] = std::exp(x[i]); - } -#endif -} template <> template void Blas::VEXP(int n, const T *x, T *y) const { @@ -1493,19 +1269,6 @@ void Blas::VEXP(int n, const T *x, T *y) const { #endif } -template <> -template -void Blas::VSQUARE(int n, - const T *x, - T *y) const { -#ifdef PADDLE_WITH_MKLML - CBlas::VSQUARE(n, x, y); -#else - for (int i = 0; i < n; ++i) { - y[i] = x[i] * x[i]; - } -#endif -} template <> template void Blas::VSQUARE(int n, const T *x, T *y) const { @@ -1518,20 +1281,6 @@ void Blas::VSQUARE(int n, const T *x, T *y) const { #endif } -template <> -template -void Blas::VPOW(int n, - const T *x, - T a, - T *y) const { -#ifdef PADDLE_WITH_MKLML - CBlas::VPOW(n, x, a, y); -#else - for (int i = 0; i < n; ++i) { - y[i] = std::pow(x[i], a); - } -#endif -} template <> template void Blas::VPOW(int n, const T *x, T a, T *y) const { @@ -1544,22 +1293,6 @@ void Blas::VPOW(int n, const T *x, T a, T *y) const { #endif } -template <> -template -T Blas::DOT(int n, - const T *x, - const T *y) const { -#ifdef PADDLE_WITH_MKLML - return CBlas::DOT(n, x, 1, y, 1); -#else - // try to find if openblas support cblas_dot - T sum = 0; - for (int i = 0; i < n; ++i) { - sum += x[i] * y[i]; - } - return sum; -#endif -} template <> template T Blas::DOT(int n, const T *x, const T *y) const { @@ -1575,20 +1308,6 @@ T Blas::DOT(int n, const T *x, const T *y) const { #endif } -template <> -template -void Blas::SCAL(int n, - const T a, - T *x) const { -#ifdef PADDLE_WITH_MKLML - CBlas::SCAL(n, a, x, 1); -#else - // try to find if openblas support cblas_scal - for (int i = 0; i < n; ++i) { - x[i] = a * x[i]; - } -#endif -} template <> template void Blas::SCAL(int n, const T a, T *x) const { @@ -1602,20 +1321,6 @@ void Blas::SCAL(int n, const T a, T *x) const { #endif } -template <> -template -T Blas::ASUM(int n, T *x, int inc) const { - auto sum = static_cast(0.0); -#ifdef PADDLE_WITH_MKLML - sum = CBlas::ASUM(n, x, inc); -#else - // TODO(jczaja): check if openblas does provide cblas_sasum/cblas_dasum - for (int c = 0; c < n; ++c) { - sum += x[c]; - } -#endif - return sum; -} template <> template T Blas::ASUM(int n, T *x, int inc) const { @@ -1631,19 +1336,6 @@ T Blas::ASUM(int n, T *x, int inc) const { return sum; } -template <> -template -void Blas::GEMV(bool trans_a, - int M, - int N, - T alpha, - const T *A, - const T *B, - T beta, - T *C) const { - CBLAS_TRANSPOSE transA = !trans_a ? CblasNoTrans : CblasTrans; - CBlas::GEMV(CblasRowMajor, transA, M, N, alpha, A, N, B, 1, beta, C, 1); -} template <> template void Blas::GEMV(bool trans_a, @@ -1658,66 +1350,6 @@ void Blas::GEMV(bool trans_a, CBlas::GEMV(CblasRowMajor, transA, M, N, alpha, A, N, B, 1, beta, C, 1); } -template <> -template -void Blas::BatchedGEMM( - CBLAS_TRANSPOSE transA, - CBLAS_TRANSPOSE transB, - int M, - int N, - int K, - T alpha, - const T *A, - const T *B, - T beta, - T *C, - int batchCount, - int64_t strideA, - int64_t strideB) const { - PADDLE_ENFORCE_NOT_NULL( - A, phi::errors::InvalidArgument("Pointer A should not be null.")); - PADDLE_ENFORCE_NOT_NULL( - B, phi::errors::InvalidArgument("Pointer B should not be null.")); - PADDLE_ENFORCE_NOT_NULL( - C, phi::errors::InvalidArgument("Pointer C should not be null.")); -#ifdef PADDLE_WITH_MKLML - int lda = (transA == CblasNoTrans) ? K : M; - int ldb = (transB == CblasNoTrans) ? N : K; - int ldc = N; - auto a_array = std::vector(batchCount); - auto b_array = std::vector(batchCount); - auto c_array = std::vector(batchCount); - for (int k = 0; k < batchCount; ++k) { - a_array[k] = &A[k * strideA]; - b_array[k] = &B[k * strideB]; - c_array[k] = &C[k * M * N]; - } - - CBlas::GEMM_BATCH(CblasRowMajor, - &transA, - &transB, - &M, - &N, - &K, - &alpha, - a_array.data(), - &lda, - b_array.data(), - &ldb, - &beta, - c_array.data(), - &ldc, - 1 /* group_count */, - &batchCount); -#else - for (int k = 0; k < batchCount; ++k) { - auto *Ak = &A[k * strideA]; - auto *Bk = &B[k * strideB]; - auto *Ck = &C[k * M * N]; - this->template GEMM(transA, transB, M, N, K, alpha, Ak, Bk, beta, Ck); - } -#endif -} template <> template void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, @@ -1778,47 +1410,6 @@ void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, #endif } -template <> -template -void Blas::BatchedGEMM( - CBLAS_TRANSPOSE transA, - CBLAS_TRANSPOSE transB, - int M, - int N, - int K, - T alpha, - const T **A, - const T **B, - T beta, - T **C, - int batchCount) const { -#ifdef PADDLE_WITH_MKLML - const int lda = (std::max)((transA == CblasNoTrans) ? K : M, 1); - const int ldb = (std::max)((transB == CblasNoTrans) ? N : K, 1); - const int ldc = (std::max)(N, 1); - CBlas::GEMM_BATCH(CblasRowMajor, - &transA, - &transB, - &M, - &N, - &K, - &alpha, - A, - &lda, - B, - &ldb, - &beta, - C, - &ldc, - 1 /* group_count */, - &batchCount); -#else - for (int k = 0; k < batchCount; ++k) { - this->template GEMM( - transA, transB, M, N, K, alpha, A[k], B[k], beta, C[k]); - } -#endif -} template <> template void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, @@ -1864,113 +1455,6 @@ void Blas::BatchedGEMM(CBLAS_TRANSPOSE transA, !defined(PADDLE_WITH_HIP) // @{ Group Blas MKLML: BatchedGEMMWithHead template <> template -void Blas::BatchedGEMMWithHead( - CBLAS_TRANSPOSE transA, - CBLAS_TRANSPOSE transB, - int W1, - int H1, - int W2, - int H2, - T alpha, - const T *A, - const T *B, - T beta, - T *C, - int batchCount, - int64_t strideA, - int64_t strideB, - int64_t head_number, - bool split_b_vertical) const { - int lda = (transA == CblasNoTrans) ? W1 : H1; - int ldb = (transB == CblasNoTrans) ? W2 : H2; - auto a_array = std::vector(batchCount); - auto b_array = std::vector(batchCount); - auto c_array = std::vector(batchCount); - - if (split_b_vertical) { - int ldc = W2; - int sub_width = W2 / head_number; - - for (int i = 0; i < head_number; i++) { - int sub_matA_offset = (transA == CblasNoTrans) - ? i * (W1 / head_number) - : i * (W1 / head_number) * H1; - int sub_matB_offset = (transB == CblasNoTrans) - ? i * (W2 / head_number) - : i * (W2 / head_number) * H2; - int sub_matC_offset = i * W2 / head_number; - for (int k = 0; k < batchCount; ++k) { - a_array[k] = &A[k * strideA] + sub_matA_offset; - b_array[k] = &B[k * strideB] + sub_matB_offset; - c_array[k] = &C[k * H1 * W2] + sub_matC_offset; - } - - CBlas::GEMM_BATCH(CblasRowMajor, - &transA, - &transB, - &H1, - &sub_width, - &H2, - &alpha, - a_array.data(), - &lda, - b_array.data(), - &ldb, - &beta, - c_array.data(), - &ldc, - 1 /* group_count */, - &batchCount); - } - - } else { - PADDLE_ENFORCE_EQ( - W1, - H2, - phi::errors::InvalidArgument( - "The fisrt matrix width should be same as second matrix height," - "but received fisrt matrix width %d" - ", second matrix height %d", - W1, - H2)); - int ldc = W2 * head_number; - int sub_width = W1 / head_number; - - for (int i = 0; i < head_number; i++) { - int sub_matA_offset = (transA == CblasNoTrans) - ? i * (W1 / head_number) - : i * (W1 / head_number) * H1; - int sub_matB_offset = (transB == CblasNoTrans) - ? i * (W1 / head_number) * W2 - : i * (W1 / head_number); - int sub_matC_offset = i * W2; - for (int k = 0; k < batchCount; ++k) { - a_array[k] = &A[k * strideA] + sub_matA_offset; - b_array[k] = &B[k * strideB] + sub_matB_offset; - c_array[k] = &C[k * H1 * head_number * W2] + sub_matC_offset; - } - - CBlas::GEMM_BATCH(CblasRowMajor, - &transA, - &transB, - &H1, - &W2, - &sub_width, - &alpha, - a_array.data(), - &lda, - b_array.data(), - &ldb, - &beta, - c_array.data(), - &ldc, - 1 /* group_count */, - &batchCount); - } - } -} -template <> -template void Blas::BatchedGEMMWithHead(CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, int W1, @@ -2097,43 +1581,6 @@ void Blas::MatMul( N); } -template <> -template -void Blas::MatMul( - const int M, const int N, const int K, const T *A, const T *B, T *C) const { -#ifdef PADDLE_WITH_LIBXSMM - // Refer to https://github.com/hfp/libxsmm/blob/master/README.md - // But the threshold is custom constexpr int LIBXSMM_THRESHOLD = 20 * 20 * 20; - - // Since the matrix is very small, - // so the unit of calculation is already very fast, - // and the if( M*N*K < LIBXSMM_THRESHOLD) would be overhead, - // use xsmm directly. - // Note: SMM use ColMajor - const char transa = 'N'; - const char transb = 'N'; - const T alpha = static_cast(1); - const T beta = static_cast(0); - CBlas::SMM_GEMM( - &transa, &transb, &N, &M, &K, &alpha, B, &N, A, &K, &beta, C, &N); - return; -#endif - - CBlas::GEMM(CblasRowMajor, - CblasNoTrans, - CblasNoTrans, - M, - N, - K, - static_cast(1), - A, - K, - B, - N, - static_cast(0), - C, - N); -} template <> template void Blas::MatMul( @@ -2425,20 +1872,6 @@ void Blas::VINV(int n, const T *a, T *y) const { #endif } -template <> -template -void Blas::VMERF(int n, - const T *a, - T *y, - int64_t mode) const { -#ifdef PADDLE_WITH_MKLML - CBlas::VMERF(n, a, y, mode); -#else - for (int i = 0; i < n; ++i) { - y[i] = std::erf(a[i]); - } -#endif -} template <> template void Blas::VMERF(int n, const T *a, T *y, int64_t mode) const { @@ -2454,39 +1887,6 @@ void Blas::VMERF(int n, const T *a, T *y, int64_t mode) const { #ifdef PADDLE_WITH_MKLML template <> template -void Blas::CSRMM(const char *transa, - const int *m, - const int *n, - const int *k, - const T *alpha, - const char *matdescra, - const T *val, - const int *indx, - const int *pntrb, - const int *pntre, - const T *b, - const int *ldb, - const T *beta, - T *c, - const int *ldc) const { - CBlas::CSRMM(transa, - m, - n, - k, - alpha, - matdescra, - val, - indx, - pntrb, - pntre, - b, - ldb, - beta, - c, - ldc); -} -template <> -template void Blas::CSRMM(const char *transa, const int *m, const int *n, @@ -2520,22 +1920,6 @@ void Blas::CSRMM(const char *transa, } #endif -template <> -template -void Blas::TRSM(CBLAS_SIDE side, - CBLAS_UPLO uplo, - CBLAS_TRANSPOSE transA, - CBLAS_DIAG diag, - int M, - int N, - T alpha, - const T *A, - int lda, - T *B, - int ldb) const { - CBlas::TRSM( - CblasRowMajor, side, uplo, transA, diag, M, N, alpha, A, lda, B, ldb); -} template <> template void Blas::TRSM(CBLAS_SIDE side, diff --git a/paddle/phi/kernels/funcs/fc_functor.cc b/paddle/phi/kernels/funcs/fc_functor.cc index 0fb38c971ab..0434483be13 100644 --- a/paddle/phi/kernels/funcs/fc_functor.cc +++ b/paddle/phi/kernels/funcs/fc_functor.cc @@ -96,8 +96,6 @@ void FCFunctor::operator()(const DeviceContext& context, } } -template class FCFunctor; -template class FCFunctor; template class FCFunctor; template class FCFunctor; diff --git a/paddle/phi/kernels/funcs/for_range.h b/paddle/phi/kernels/funcs/for_range.h index bf0888c301f..78066ce5b2f 100644 --- a/paddle/phi/kernels/funcs/for_range.h +++ b/paddle/phi/kernels/funcs/for_range.h @@ -41,22 +41,6 @@ struct ForRange { size_t limit_; }; -// NOTE: After the pten kernel is migrated, it needs to be deleted. -template <> -struct ForRange { - ForRange(const paddle::platform::CPUDeviceContext& dev_ctx, size_t limit) - : dev_ctx_(dev_ctx), limit_(limit) {} - - template - void operator()(Function func) const { - phi::funcs::ForRange for_range(dev_ctx_, limit_); - for_range(func); - } - - const paddle::platform::CPUDeviceContext& dev_ctx_; - size_t limit_; -}; - #if defined(__NVCC__) || defined(__HIPCC__) template diff --git a/paddle/phi/kernels/funcs/gru_compute.cc b/paddle/phi/kernels/funcs/gru_compute.cc index 8cda2e9062a..c081a9ed97d 100644 --- a/paddle/phi/kernels/funcs/gru_compute.cc +++ b/paddle/phi/kernels/funcs/gru_compute.cc @@ -179,60 +179,6 @@ struct GRUUnitGradFunctor { } }; -template -struct GRUUnitFunctorV2 { - static void compute(const paddle::platform::CPUDeviceContext &context, - GRUMetaValue value, - int frame_size, - int batch_size, - const phi::funcs::detail::ActivationType active_node, - const phi::funcs::detail::ActivationType active_gate) { -#if !defined(__NVCC__) && !defined(__HIPCC___) - auto blas = - phi::funcs::GetBlas(context); - if (value.prev_out_value) { - blas.GEMM(CblasNoTrans, - CblasTrans, - batch_size, - frame_size, - frame_size, - 1, - value.prev_out_value, - value.state_weight, - 0, - value.reset_output_value); - } - detail::forward_reset_output( - phi::funcs::detail::forward::gru_resetOutput(), - value, - frame_size, - batch_size, - active_gate, - false, - &context); - - T *cell_state_value = value.gate_value + 2 * frame_size; - T *reset_output_value = value.reset_output_value; - for (int b = 0; b < batch_size; ++b) { - blas.VADD( - frame_size, cell_state_value, reset_output_value, cell_state_value); - cell_state_value += frame_size * 3; - reset_output_value += frame_size; - } - - detail::forward_final_output( - phi::funcs::detail::forward::gru_finalOutput(), - value, - frame_size, - batch_size, - active_node, - true, - false, - &context); -#endif - } -}; - template struct GRUUnitFunctorV2 { static void compute(const CPUContext &context, @@ -286,131 +232,6 @@ struct GRUUnitFunctorV2 { } }; -template -struct GRUUnitGradFunctorV2 { - static void compute(const paddle::platform::CPUDeviceContext &context, - GRUMetaValue value, - GRUMetaGrad grad, - int frame_size, - int batch_size, - const phi::funcs::detail::ActivationType active_node, - const phi::funcs::detail::ActivationType active_gate) { -#if !defined(__NVCC__) && !defined(__HIPCC___) - // calculate grad_update_gate, grad_frame_state, - // grad_reset_output, grad_reset_gate - detail::cpu_gru_backward(context, - phi::funcs::detail::backward::gru(), - value, - grad, - frame_size, - batch_size, - active_node, - active_gate); - auto blas = - phi::funcs::GetBlas(context); - if (grad.prev_out_grad && value.prev_out_value) { - // update prev_out_grad - blas.GEMM(false, - false, - batch_size, - frame_size, - frame_size, - 1, - grad.gate_grad, - frame_size * 3, - value.gate_weight, - frame_size, - 1, - grad.prev_out_grad, - frame_size); - blas.GEMM(false, - false, - batch_size, - frame_size, - frame_size, - 1, - grad.gate_grad + frame_size, - frame_size * 3, - value.gate_weight + frame_size * frame_size, - frame_size, - 1, - grad.prev_out_grad, - frame_size); - blas.GEMM(false, - false, - batch_size, - frame_size, - frame_size, - 1, - grad.reset_output_grad, - frame_size, - value.state_weight, - frame_size, - 1, - grad.prev_out_grad, - frame_size); - // update weight_hh_grad - if (grad.gate_weight_grad) { - // reset gate - blas.GEMM(true, - false, - frame_size, - frame_size, - batch_size, - 1, - grad.gate_grad, - frame_size * 3, - value.prev_out_value, - frame_size, - 1, - grad.gate_weight_grad, - frame_size); - // update gate - blas.GEMM(true, - false, - frame_size, - frame_size, - batch_size, - 1, - grad.gate_grad + frame_size, - frame_size * 3, - value.prev_out_value, - frame_size, - 1, - grad.gate_weight_grad + frame_size * frame_size, - frame_size); - // cell state - blas.GEMM(true, - false, - frame_size, - frame_size, - batch_size, - 1, - grad.reset_output_grad, - frame_size, - value.prev_out_value, - frame_size, - 1, - grad.state_weight_grad, - frame_size); - } - } - // update bias_hh_grad - T *gate_grad = grad.gate_grad; - T *bias_hh_grad = grad.bias_hh_grad; - T *state_bias_grad = grad.bias_hh_grad + 2 * frame_size; - T *reset_output_grad = grad.reset_output_grad; - for (int b = 0; b < batch_size; ++b) { - blas.VADD(2 * frame_size, bias_hh_grad, gate_grad, bias_hh_grad); - blas.VADD( - frame_size, state_bias_grad, reset_output_grad, state_bias_grad); - gate_grad += 3 * frame_size; - reset_output_grad += frame_size; - } -#endif - } -}; - template struct GRUUnitGradFunctorV2 { static void compute(const CPUContext &context, @@ -540,12 +361,6 @@ template struct GRUUnitFunctor; template struct GRUUnitGradFunctor; template struct GRUUnitGradFunctor; -template struct GRUUnitFunctorV2; -template struct GRUUnitFunctorV2; -template struct GRUUnitGradFunctorV2; -template struct GRUUnitGradFunctorV2; - template struct GRUUnitFunctorV2; template struct GRUUnitFunctorV2; template struct GRUUnitGradFunctorV2; diff --git a/paddle/phi/kernels/funcs/lstm_compute.cc b/paddle/phi/kernels/funcs/lstm_compute.cc index 45d0b2e40b4..e4b8a6961fd 100644 --- a/paddle/phi/kernels/funcs/lstm_compute.cc +++ b/paddle/phi/kernels/funcs/lstm_compute.cc @@ -21,38 +21,6 @@ limitations under the License. */ namespace phi { namespace funcs { -template -struct LstmUnitFunctor { - static void compute(const paddle::platform::CPUDeviceContext& context, - LstmMetaValue value, - int frame_size, - int batch_size, - T cell_clip, - const phi::funcs::detail::ActivationType& gate_act, - const phi::funcs::detail::ActivationType& cell_act, - const phi::funcs::detail::ActivationType& cand_act, - bool old_api_version = true) { - for (int b = 0; b < batch_size; b++) { - detail::cpu_lstm_forward(context, - phi::funcs::detail::forward::lstm(), - value, - frame_size, - cell_clip, - cand_act, - gate_act, - cell_act, - old_api_version); - value.gate_value += frame_size * 4; - value.state_value += frame_size; - value.state_active_value += frame_size; - value.output_value += frame_size; - if (value.prev_state_value) { - value.prev_state_value += frame_size; - } - } - } -}; - template struct LstmUnitFunctor { static void compute(const CPUContext& context, @@ -85,49 +53,6 @@ struct LstmUnitFunctor { } }; -template -struct LstmUnitGradFunctor { - static void compute(const paddle::platform::CPUDeviceContext& context, - LstmMetaValue value, - LstmMetaGrad grad, - int frame_size, - int batch_size, - T cell_clip, - const phi::funcs::detail::ActivationType& gate_act, - const phi::funcs::detail::ActivationType& cell_act, - const phi::funcs::detail::ActivationType& cand_act, - bool old_api_version = true) { - for (int b = 0; b < batch_size; b++) { - detail::cpu_lstm_backward(context, - phi::funcs::detail::backward::lstm(), - value, - grad, - frame_size, - cell_clip, - cand_act, - gate_act, - cell_act, - old_api_version); - - value.gate_value += frame_size * 4; - value.state_value += frame_size; - value.state_active_value += frame_size; - value.output_value += frame_size; - if (value.prev_state_value) { - value.prev_state_value += frame_size; - } - - grad.gate_grad += frame_size * 4; - grad.state_grad += frame_size; - grad.state_active_grad += frame_size; - grad.output_grad += frame_size; - if (grad.prev_state_grad) { - grad.prev_state_grad += frame_size; - } - } - } -}; - template struct LstmUnitGradFunctor { static void compute(const CPUContext& context, @@ -171,11 +96,6 @@ struct LstmUnitGradFunctor { } }; -template class LstmUnitFunctor; -template class LstmUnitFunctor; -template class LstmUnitGradFunctor; -template class LstmUnitGradFunctor; - template class LstmUnitFunctor; template class LstmUnitFunctor; template class LstmUnitGradFunctor; diff --git a/paddle/phi/kernels/funcs/math_function.cc b/paddle/phi/kernels/funcs/math_function.cc index 033c50e537d..042b333ad45 100644 --- a/paddle/phi/kernels/funcs/math_function.cc +++ b/paddle/phi/kernels/funcs/math_function.cc @@ -39,22 +39,6 @@ namespace funcs { using float16 = phi::dtype::float16; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant; -template struct SetConstant>; -template struct SetConstant>; - template struct SetConstant; template struct SetConstant; template struct SetConstant; @@ -85,46 +69,20 @@ template struct SetConstant>; #endif -#define DEFINE_CPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose, \ - RANK>; \ - template struct Transpose, \ - RANK>; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose, \ - RANK>; \ +#define DEFINE_CPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose, \ + RANK>; \ template struct Transpose, RANK>; DEFINE_CPU_TRANS(1); @@ -163,8 +121,7 @@ void TransposeNormal::operator()( } // define transpose normal -#define DEFINE_CPU_TRANS_NORMAL(TYPE) \ - template struct TransposeNormal; \ +#define DEFINE_CPU_TRANS_NORMAL(TYPE) \ template struct TransposeNormal DEFINE_CPU_TRANS_NORMAL(phi::dtype::float16); @@ -291,6 +248,31 @@ void set_constant(const paddle::platform::DeviceContext& context, #endif } +template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; +template struct ColwiseSum; + +template struct RowwiseMean; +template struct RowwiseMean; + +template +struct ElementwiseAddTo { + void operator()(paddle::platform::CPUDeviceContext* ctx, + const paddle::framework::Tensor& src, + paddle::framework::Tensor* dst) { + auto in = paddle::framework::EigenVector::Flatten(src); + auto out = paddle::framework::EigenVector::Flatten(*dst); + auto& place = *(ctx->eigen_device()); + out.device(place) = out + in; + } +}; + +template struct ElementwiseAddTo; +template struct ElementwiseAddTo; + template struct RowwiseAdd { void operator()(const paddle::platform::CPUDeviceContext& context, @@ -333,41 +315,5 @@ struct RowwiseAdd { template struct RowwiseAdd; template struct RowwiseAdd; -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; - -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; -template struct ColwiseSum; - -template struct RowwiseSum; -template struct RowwiseSum; - -template struct RowwiseMean; -template struct RowwiseMean; - -template struct RowwiseMean; -template struct RowwiseMean; - -template -struct ElementwiseAddTo { - void operator()(paddle::platform::CPUDeviceContext* ctx, - const paddle::framework::Tensor& src, - paddle::framework::Tensor* dst) { - auto in = paddle::framework::EigenVector::Flatten(src); - auto out = paddle::framework::EigenVector::Flatten(*dst); - auto& place = *(ctx->eigen_device()); - out.device(place) = out + in; - } -}; - -template struct ElementwiseAddTo; -template struct ElementwiseAddTo; - } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/funcs/matrix_inverse.cc b/paddle/phi/kernels/funcs/matrix_inverse.cc index c95e97f8ea8..c316970e6a5 100644 --- a/paddle/phi/kernels/funcs/matrix_inverse.cc +++ b/paddle/phi/kernels/funcs/matrix_inverse.cc @@ -29,9 +29,5 @@ void MatrixInverseFunctor::operator()(const Context& dev_ctx, template class MatrixInverseFunctor; template class MatrixInverseFunctor; -// TODO(chenweihang): remove these instantiations later -template class MatrixInverseFunctor; -template class MatrixInverseFunctor; - } // namespace funcs } // namespace phi diff --git a/paddle/phi/tests/api/test_sparse_utils_api.cc b/paddle/phi/tests/api/test_sparse_utils_api.cc index e0201755511..d5891baaf10 100644 --- a/paddle/phi/tests/api/test_sparse_utils_api.cc +++ b/paddle/phi/tests/api/test_sparse_utils_api.cc @@ -48,7 +48,6 @@ TEST(API, to_sparse_coo) { std::copy(&dense_data[0][0], &dense_data[0][0] + 9, dense_x_data); phi::CPUContext dev_ctx_cpu; - dev_ctx_cpu.Init(); // 1. test dense_to_sparse_coo paddle::experimental::Tensor x(dense_x); diff --git a/paddle/phi/tests/common/test_scalar.cu b/paddle/phi/tests/common/test_scalar.cu index 50b9e198da0..95334ac36a6 100644 --- a/paddle/phi/tests/common/test_scalar.cu +++ b/paddle/phi/tests/common/test_scalar.cu @@ -47,7 +47,6 @@ TEST(Scalar, ConstructFromDenseTensor1) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) .get()); - dev_ctx.Init(); auto* dense_x_data = dev_ctx.Alloc(&dense_x); dense_x_data[0] = 1; @@ -67,7 +66,6 @@ TEST(Scalar, ConstructFromDenseTensor2) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) .get()); - dev_ctx.Init(); auto* dense_x_data = dev_ctx.Alloc(&dense_x); dense_x_data[0] = 1; @@ -87,7 +85,6 @@ TEST(Scalar, ConstructFromDenseTensor3) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) .get()); - dev_ctx.Init(); auto* dense_x_data = dev_ctx.Alloc(&dense_x); dense_x_data[0] = 1; @@ -107,7 +104,6 @@ TEST(Scalar, ConstructFromDenseTensor4) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) .get()); - dev_ctx.Init(); auto* dense_x_data = dev_ctx.Alloc(&dense_x); dense_x_data[0] = true; @@ -127,7 +123,6 @@ TEST(Scalar, ConstructFromDenseTensor5) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) .get()); - dev_ctx.Init(); auto* dense_x_data = dev_ctx.Alloc(&dense_x); dense_x_data[0] = 1; @@ -148,7 +143,6 @@ TEST(Scalar, ConstructFromDenseTensor6) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) .get()); - dev_ctx.Init(); auto* dense_x_data = dev_ctx.Alloc(&dense_x); dense_x_data[0] = 1; @@ -170,7 +164,6 @@ TEST(Scalar, ConstructFromDenseTensor7) { .GetAllocator(phi::GPUPlace()) .get()); dev_ctx.Init(); - auto* dense_x_data = dev_ctx.Alloc(&dense_x); FillTensor<<<1, 1, 0, dev_ctx.stream()>>>(dense_x_data); dev_ctx.Wait(); diff --git a/paddle/phi/tests/core/CMakeLists.txt b/paddle/phi/tests/core/CMakeLists.txt index c299559da59..3d549aa5f16 100644 --- a/paddle/phi/tests/core/CMakeLists.txt +++ b/paddle/phi/tests/core/CMakeLists.txt @@ -24,10 +24,6 @@ cc_test( test_op_utils SRCS test_op_utils.cc DEPS op_compat_infos) -cc_test( - test_phi_device_context - SRCS test_device_context.cc - DEPS phi_context cpu_context) cc_test( test_meta_fn_utils SRCS test_meta_fn_utils.cc diff --git a/paddle/phi/tests/core/test_device_context.cc b/paddle/phi/tests/core/test_device_context.cc deleted file mode 100644 index 844330ee097..00000000000 --- a/paddle/phi/tests/core/test_device_context.cc +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "gtest/gtest.h" - -// TODO(wilber): will remove after the cpu, gpu context megre. -#include "paddle/phi/backends/cpu/cpu_context.h" -// #include "paddle/phi/backends/all_context.h" - -// NOTE: The paddle framework should add WITH_EIGEN option to support compile -// without eigen. -#include "unsupported/Eigen/CXX11/Tensor" - -namespace phi { -namespace tests { - -class InferenceCPUContext : public CPUContext { - public: - void SetEigenDevice(Eigen::DefaultDevice* eigen_device) { - CPUContext::SetEigenDevice(eigen_device); - } -}; - -TEST(DeviceContext, cpu_context) { - std::cout << "test training scenarios" << std::endl; - { - phi::CPUContext ctx; - ctx.Init(); - EXPECT_TRUE(ctx.eigen_device() != nullptr); - } - - std::cout << "test inference scenarios" << std::endl; - Eigen::DefaultDevice* device = new Eigen::DefaultDevice(); - { - InferenceCPUContext ctx; - ctx.SetEigenDevice(device); - EXPECT_TRUE(ctx.eigen_device() != nullptr); - } - delete device; -} - -} // namespace tests -} // namespace phi diff --git a/paddle/phi/tests/kernels/test_cast_dev_api.cc b/paddle/phi/tests/kernels/test_cast_dev_api.cc index 179e44f0f0f..d43cd075ed5 100644 --- a/paddle/phi/tests/kernels/test_cast_dev_api.cc +++ b/paddle/phi/tests/kernels/test_cast_dev_api.cc @@ -52,7 +52,6 @@ TEST(DEV_API, cast) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); phi::DataType out_dtype = phi::DataType::FLOAT64; // 2. test API diff --git a/paddle/phi/tests/kernels/test_concat_dev_api.cc b/paddle/phi/tests/kernels/test_concat_dev_api.cc index 0dd58b1bba9..9283fcd0b65 100644 --- a/paddle/phi/tests/kernels/test_concat_dev_api.cc +++ b/paddle/phi/tests/kernels/test_concat_dev_api.cc @@ -60,7 +60,6 @@ TEST(DEV_API, concat) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = phi::Concat(dev_ctx, inputs, 0); // 3. check result diff --git a/paddle/phi/tests/kernels/test_conj_dev_api.cc b/paddle/phi/tests/kernels/test_conj_dev_api.cc index 5ac676ffcbc..2f7ab838373 100644 --- a/paddle/phi/tests/kernels/test_conj_dev_api.cc +++ b/paddle/phi/tests/kernels/test_conj_dev_api.cc @@ -48,7 +48,6 @@ TEST(DEV_API, conj) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); // 2. test API auto out = phi::Conj(dev_ctx, dense_x); diff --git a/paddle/phi/tests/kernels/test_copy_dev_api.cc b/paddle/phi/tests/kernels/test_copy_dev_api.cc index 1c9b17ed613..c2df0a8acdc 100644 --- a/paddle/phi/tests/kernels/test_copy_dev_api.cc +++ b/paddle/phi/tests/kernels/test_copy_dev_api.cc @@ -65,7 +65,6 @@ TEST(DEV_API, copy) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); phi::Copy( dev_ctx, *(dense_src.get()), phi::CPUPlace(), false, dense_dst.get()); diff --git a/paddle/phi/tests/kernels/test_creation_dev_api.cc b/paddle/phi/tests/kernels/test_creation_dev_api.cc index 2dcd8739991..5685c3a2a0b 100644 --- a/paddle/phi/tests/kernels/test_creation_dev_api.cc +++ b/paddle/phi/tests/kernels/test_creation_dev_api.cc @@ -36,7 +36,6 @@ TEST(DEV_API, empty) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); // 2. test API auto out = phi::Empty(dev_ctx, {3, 2}); @@ -66,7 +65,6 @@ TEST(DEV_API, empty_like) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = phi::EmptyLike(dev_ctx, dense_x); // 3. check result @@ -86,7 +84,6 @@ TEST(DEV_API, full) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = phi::Full(dev_ctx, {3, 2}, val); // 3. check result @@ -119,7 +116,6 @@ TEST(DEV_API, full_like) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); // 2. test API auto out = phi::FullLike(dev_ctx, dense_x, val); diff --git a/paddle/phi/tests/kernels/test_dot_dev_api.cc b/paddle/phi/tests/kernels/test_dot_dev_api.cc index de20907cadf..a2af0471df0 100644 --- a/paddle/phi/tests/kernels/test_dot_dev_api.cc +++ b/paddle/phi/tests/kernels/test_dot_dev_api.cc @@ -61,7 +61,6 @@ TEST(DEV_API, dot) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = phi::Dot(dev_ctx, dense_x, dense_y); // 3. check result diff --git a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc index 63f8b86a534..4100889d3ac 100644 --- a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc @@ -66,7 +66,6 @@ TEST(DEV_API, add) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto dense_out = phi::Add(dev_ctx, dense_x, dense_y); // 3. check result @@ -118,7 +117,6 @@ TEST(DEV_API, subtract) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto dense_out = phi::Subtract(dev_ctx, dense_x, dense_y); // 3. check result @@ -170,7 +168,6 @@ TEST(DEV_API, divide) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto dense_out = phi::Divide(dev_ctx, dense_x, dense_y); // 3. check result @@ -222,7 +219,6 @@ TEST(DEV_API, multiply) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto dense_out = phi::Multiply(dev_ctx, dense_x, dense_y); // 3. check result diff --git a/paddle/phi/tests/kernels/test_flatten_dev_api.cc b/paddle/phi/tests/kernels/test_flatten_dev_api.cc index fb1cdee7e5f..860af4c4a4d 100644 --- a/paddle/phi/tests/kernels/test_flatten_dev_api.cc +++ b/paddle/phi/tests/kernels/test_flatten_dev_api.cc @@ -52,7 +52,6 @@ TEST(DEV_API, flatten) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); // 2. test API auto out = phi::Flatten(dev_ctx, dense_x, start_axis, stop_axis); diff --git a/paddle/phi/tests/kernels/test_math_function.cc b/paddle/phi/tests/kernels/test_math_function.cc index 29f33c555d1..a13a8cb564f 100644 --- a/paddle/phi/tests/kernels/test_math_function.cc +++ b/paddle/phi/tests/kernels/test_math_function.cc @@ -273,7 +273,6 @@ TEST(math_funciton, set_constant) { t.Resize({10, 10}); t.mutable_data(paddle::platform::CPUPlace()); auto* ctx = new paddle::platform::CPUDeviceContext(); - ctx->Init(); phi::funcs::set_constant(*ctx, &t, 10); for (int64_t i = 0; i < t.numel(); ++i) { PADDLE_ENFORCE_EQ(10, diff --git a/paddle/phi/tests/kernels/test_matmul_dev_api.cc b/paddle/phi/tests/kernels/test_matmul_dev_api.cc index f25acaf9bcc..374a05fc5e4 100644 --- a/paddle/phi/tests/kernels/test_matmul_dev_api.cc +++ b/paddle/phi/tests/kernels/test_matmul_dev_api.cc @@ -58,7 +58,6 @@ TEST(DEV_API, dot) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = Matmul(dev_ctx, dense_x, dense_y, false, false); // 3. check result diff --git a/paddle/phi/tests/kernels/test_mean_dev_api.cc b/paddle/phi/tests/kernels/test_mean_dev_api.cc index 6f3f91a7dbe..1c791503913 100644 --- a/paddle/phi/tests/kernels/test_mean_dev_api.cc +++ b/paddle/phi/tests/kernels/test_mean_dev_api.cc @@ -51,7 +51,6 @@ TEST(DEV_API, mean) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = phi::Mean(dev_ctx, dense_x, dims, false); // 3. check result diff --git a/paddle/phi/tests/kernels/test_reshape_dev_api.cc b/paddle/phi/tests/kernels/test_reshape_dev_api.cc index f0f521d57db..708b31cb9a9 100644 --- a/paddle/phi/tests/kernels/test_reshape_dev_api.cc +++ b/paddle/phi/tests/kernels/test_reshape_dev_api.cc @@ -54,7 +54,6 @@ TEST(DEV_API, reshape) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = phi::Reshape(dev_ctx, dense_x, shape); // 3. check result std::vector expect_shape = {12, 3}; diff --git a/paddle/phi/tests/kernels/test_scale_dev_api.cc b/paddle/phi/tests/kernels/test_scale_dev_api.cc index eff18bdeeca..57e186ab393 100644 --- a/paddle/phi/tests/kernels/test_scale_dev_api.cc +++ b/paddle/phi/tests/kernels/test_scale_dev_api.cc @@ -51,7 +51,6 @@ TEST(DEV_API, scale) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = phi::Scale(dev_ctx, dense_x, scale, bias, bias_after_scale); @@ -93,7 +92,6 @@ TEST(DEV_API, scale_host) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto out = phi::Scale(dev_ctx, dense_x, scale, bias, bias_after_scale); diff --git a/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc index d1c464e4b1c..51d1e67f5af 100644 --- a/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc @@ -42,7 +42,6 @@ TEST(DEV_API, sparse_relu) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx_cpu.Init(); DenseTensor dense_x = phi::Empty(dev_ctx_cpu, diff --git a/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc index bb84690cd07..f08c7b0872b 100644 --- a/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc @@ -75,7 +75,6 @@ void TestConv3dBase(const std::vector& indices, paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx_cpu.Init(); const int in_channels = kernel_dims[3]; const int out_channels = kernel_dims[4]; diff --git a/paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc index 50848ae5f1c..cbac854d48e 100644 --- a/paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc @@ -113,7 +113,6 @@ TEST(DEV_API, sparse_elementwise_coo_kernel_double) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx_cpu.Init(); auto coo_x = sparse::DenseToSparseCoo(dev_ctx_cpu, dense_x, sparse_dim); auto coo_y = sparse::DenseToSparseCoo(dev_ctx_cpu, dense_y, sparse_dim); @@ -159,7 +158,6 @@ TEST(DEV_API, sparse_elementwise_csr_kernel_float) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx_cpu.Init(); auto csr_x = sparse::DenseToSparseCsr(dev_ctx_cpu, dense_x); auto csr_y = sparse::DenseToSparseCsr(dev_ctx_cpu, dense_y); @@ -357,7 +355,6 @@ TEST(DEV_API, sparse_elementwise_csr_grad_kernel_float) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx_cpu.Init(); auto csr_x = sparse::DenseToSparseCsr(dev_ctx_cpu, dense_x); auto csr_y = sparse::DenseToSparseCsr(dev_ctx_cpu, dense_y); @@ -404,7 +401,6 @@ TEST(DEV_API, sparse_elementwise_coo_grad_kernel_double) { paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx_cpu.Init(); auto csr_x = sparse::DenseToSparseCoo(dev_ctx_cpu, dense_x, sparse_dim); auto csr_y = sparse::DenseToSparseCoo(dev_ctx_cpu, dense_y, sparse_dim); diff --git a/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc index 7d7cd1ceaf5..460dca59c71 100644 --- a/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc @@ -60,7 +60,6 @@ void TestMaxPoolBase(const std::vector& indices, paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) .get()); - dev_ctx_cpu.Init(); const int in_channels = x_dims[4]; const int out_channels = in_channels; diff --git a/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc index d4f1d6efb5d..70c9f4cfc61 100644 --- a/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc @@ -88,7 +88,6 @@ void TestDenseToSparseCoo(const DenseTensor& dense_x, paddle::platform::CPUPlace()); phi::CPUContext dev_ctx_cpu; - dev_ctx_cpu.Init(); dev_ctx_cpu.SetAllocator( paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) @@ -307,7 +306,6 @@ void TestSparseCsrToCoo(const DDim& dense_dims, // 1. test cpu phi::CPUContext dev_ctx_cpu; - dev_ctx_cpu.Init(); dev_ctx_cpu.SetAllocator( paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) @@ -489,7 +487,6 @@ void TestCooToCsr(const DDim& dense_dims, // 1. test cpu phi::CPUContext dev_ctx_cpu; - dev_ctx_cpu.Init(); dev_ctx_cpu.SetAllocator( paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) @@ -588,7 +585,6 @@ void TestDenseToSparseCsr(const DenseTensor& dense_x, const auto alloc = std::make_shared( paddle::platform::CPUPlace()); phi::CPUContext dev_ctx_cpu; - dev_ctx_cpu.Init(); dev_ctx_cpu.SetAllocator( paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) @@ -701,7 +697,6 @@ void TestSparseCooToDense(const DDim& dense_dims, const int64_t non_zero_num, const int64_t sparse_dim) { phi::CPUContext dev_ctx_cpu; - dev_ctx_cpu.Init(); dev_ctx_cpu.SetAllocator( paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) @@ -879,7 +874,6 @@ void TestSparseCsrToDense(const DDim& dense_dims, // 1. test cpu phi::CPUContext dev_ctx_cpu; - dev_ctx_cpu.Init(); dev_ctx_cpu.SetAllocator( paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(phi::CPUPlace()) diff --git a/paddle/phi/tests/kernels/test_split_dev_api.cc b/paddle/phi/tests/kernels/test_split_dev_api.cc index a358fcdf28d..0389ab7afba 100644 --- a/paddle/phi/tests/kernels/test_split_dev_api.cc +++ b/paddle/phi/tests/kernels/test_split_dev_api.cc @@ -40,7 +40,6 @@ TEST(DEV_API, split) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); auto* dense_x_data = dev_ctx.Alloc(&dense_x); for (size_t i = 0; i < 4; ++i) { diff --git a/paddle/phi/tests/kernels/test_sum_dev_api.cc b/paddle/phi/tests/kernels/test_sum_dev_api.cc index 2cd677373f4..20e934eb692 100644 --- a/paddle/phi/tests/kernels/test_sum_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sum_dev_api.cc @@ -49,7 +49,6 @@ TEST(DEV_API, sum) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - dev_ctx.Init(); // 2. test API auto out = -- GitLab