未验证 提交 755438a7 编写于 作者: L Leo Chen 提交者: GitHub

unify cpu context, part2 (#44012)

* fix init()

* delete test_device_context

* replace CPUDeviceContext with CPUContext

* fix test_scalar

* remove dot_op.cc

* fix compile
上级 09096aeb
...@@ -251,7 +251,7 @@ void EagerGroup::ConcatTensors(const platform::Place &place) { ...@@ -251,7 +251,7 @@ void EagerGroup::ConcatTensors(const platform::Place &place) {
"Please recompile or reinstall Paddle with NCCL support.")); "Please recompile or reinstall Paddle with NCCL support."));
#endif #endif
} else if (platform::is_cpu_place(place)) { } else if (platform::is_cpu_place(place)) {
auto *default_ctx = static_cast<platform::CPUDeviceContext *>( auto *default_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
ConcatTensorsWithType( ConcatTensorsWithType(
*default_ctx, dense_tensors_, &dense_contents_, dtype_); *default_ctx, dense_tensors_, &dense_contents_, dtype_);
...@@ -274,7 +274,7 @@ void EagerGroup::SplitTensors(const platform::Place &place) { ...@@ -274,7 +274,7 @@ void EagerGroup::SplitTensors(const platform::Place &place) {
"Please recompile or reinstall Paddle with NCCL support.")); "Please recompile or reinstall Paddle with NCCL support."));
#endif #endif
} else if (platform::is_cpu_place(place)) { } else if (platform::is_cpu_place(place)) {
auto *default_ctx = static_cast<platform::CPUDeviceContext *>( auto *default_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
SplitTensorsWithType( SplitTensorsWithType(
*default_ctx, &dense_contents_, &dense_tensors_, dtype_); *default_ctx, &dense_contents_, &dense_tensors_, dtype_);
...@@ -891,7 +891,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group, ...@@ -891,7 +891,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
"Please recompile or reinstall Paddle with NCCL support.")); "Please recompile or reinstall Paddle with NCCL support."));
#endif #endif
} else if (platform::is_cpu_place(inner_place_)) { } else if (platform::is_cpu_place(inner_place_)) {
dev_ctx = static_cast<platform::CPUDeviceContext *>( dev_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(inner_place_)); platform::DeviceContextPool::Instance().Get(inner_place_));
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
......
...@@ -31,9 +31,9 @@ namespace paddle { ...@@ -31,9 +31,9 @@ namespace paddle {
namespace distributed { namespace distributed {
template <typename T> template <typename T>
inline phi::funcs::BlasT<paddle::platform::CPUDeviceContext, T> GetBlas() { inline phi::funcs::BlasT<phi::CPUContext, T> GetBlas() {
paddle::platform::CPUDeviceContext cpu_ctx; phi::CPUContext cpu_ctx;
return phi::funcs::GetBlas<paddle::platform::CPUDeviceContext, T>(cpu_ctx); return phi::funcs::GetBlas<phi::CPUContext, T>(cpu_ctx);
} }
template <typename T> template <typename T>
......
...@@ -353,11 +353,12 @@ void Communicator::RpcRecvSparse(const std::string &varname, ...@@ -353,11 +353,12 @@ void Communicator::RpcRecvSparse(const std::string &varname,
bool training = true; bool training = true;
auto status = _worker_ptr->PullSparseParam((float **)push_g_vec.data(), auto status =
table_id, // NOLINT _worker_ptr->PullSparseParam(static_cast<float **>(push_g_vec.data()),
sparse_push_keys.data(), table_id,
sparse_push_keys.size(), sparse_push_keys.data(),
training); sparse_push_keys.size(),
training);
status.wait(); status.wait();
return; return;
} }
...@@ -1184,12 +1185,12 @@ void GeoCommunicator::SendDense(const CommContext &send_ctx) { ...@@ -1184,12 +1185,12 @@ void GeoCommunicator::SendDense(const CommContext &send_ctx) {
auto &t_latest = var_latest->Get<framework::LoDTensor>(); auto &t_latest = var_latest->Get<framework::LoDTensor>();
auto t_timestamp = var_timestamp->GetMutable<framework::LoDTensor>(); auto t_timestamp = var_timestamp->GetMutable<framework::LoDTensor>();
paddle::platform::CPUDeviceContext cpu_ctx; phi::CPUContext cpu_ctx;
auto *var_delta = delta_scope_->Var(varname); auto *var_delta = delta_scope_->Var(varname);
auto *t_delta = var_delta->GetMutable<framework::LoDTensor>(); auto *t_delta = var_delta->GetMutable<framework::LoDTensor>();
t_delta->mutable_data<float>(t_latest.dims(), cpu_ctx.GetPlace()); t_delta->mutable_data<float>(t_latest.dims(), cpu_ctx.GetPlace());
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx); auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
blas.VSUB(t_latest.numel(), blas.VSUB(t_latest.numel(),
t_latest.data<float>(), t_latest.data<float>(),
t_timestamp->data<float>(), t_timestamp->data<float>(),
...@@ -1218,7 +1219,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) { ...@@ -1218,7 +1219,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) {
RpcRecvDense(varnames, table_id, pserver_scope_.get()); RpcRecvDense(varnames, table_id, pserver_scope_.get());
// 2.1 pserver - old => delta; 2.2 latest + old => latest 2.3 old => pserver // 2.1 pserver - old => delta; 2.2 latest + old => latest 2.3 old => pserver
paddle::platform::CPUDeviceContext cpu_ctx; phi::CPUContext cpu_ctx;
for (auto &varname : varnames) { for (auto &varname : varnames) {
auto *var_latest = recv_scope_->FindVar(varname); auto *var_latest = recv_scope_->FindVar(varname);
auto t_latest = var_latest->GetMutable<framework::LoDTensor>(); auto t_latest = var_latest->GetMutable<framework::LoDTensor>();
...@@ -1233,7 +1234,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) { ...@@ -1233,7 +1234,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) {
auto *t_delta = var_delta->GetMutable<framework::LoDTensor>(); auto *t_delta = var_delta->GetMutable<framework::LoDTensor>();
t_delta->mutable_data<float>(t_latest->dims(), cpu_ctx.GetPlace()); t_delta->mutable_data<float>(t_latest->dims(), cpu_ctx.GetPlace());
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx); auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
blas.VSUB(t_latest->numel(), blas.VSUB(t_latest->numel(),
t_pserver.data<float>(), t_pserver.data<float>(),
t_old->data<float>(), t_old->data<float>(),
...@@ -1334,7 +1335,7 @@ void GeoCommunicator::SendSparse(const std::string &varname, ...@@ -1334,7 +1335,7 @@ void GeoCommunicator::SendSparse(const std::string &varname,
auto *t_old = var_old->GetMutable<framework::LoDTensor>(); auto *t_old = var_old->GetMutable<framework::LoDTensor>();
auto dims1 = t_latest.dims()[1]; auto dims1 = t_latest.dims()[1];
paddle::platform::CPUDeviceContext cpu_ctx; phi::CPUContext cpu_ctx;
auto *var_delta = delta_scope_->Var(varname); auto *var_delta = delta_scope_->Var(varname);
auto *t_delta = var_delta->GetMutable<phi::SelectedRows>(); auto *t_delta = var_delta->GetMutable<phi::SelectedRows>();
...@@ -1345,7 +1346,7 @@ void GeoCommunicator::SendSparse(const std::string &varname, ...@@ -1345,7 +1346,7 @@ void GeoCommunicator::SendSparse(const std::string &varname,
t_delta->set_rows(sparse_ids); t_delta->set_rows(sparse_ids);
t_delta->set_height(t_latest.dims()[0]); t_delta->set_height(t_latest.dims()[0]);
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx); auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
float coefficient = 1.0 / static_cast<float>(trainers_); float coefficient = 1.0 / static_cast<float>(trainers_);
std::vector<float *> push_g_vec; std::vector<float *> push_g_vec;
...@@ -1419,8 +1420,8 @@ void GeoCommunicator::RecvSparse(const std::string &varname, ...@@ -1419,8 +1420,8 @@ void GeoCommunicator::RecvSparse(const std::string &varname,
std::vector<float> v_delta; std::vector<float> v_delta;
v_delta.resize(numel); v_delta.resize(numel);
paddle::platform::CPUDeviceContext cpu_ctx; phi::CPUContext cpu_ctx;
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx); auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
for (auto j = 0; j < static_cast<int>(keys.size()); ++j) { for (auto j = 0; j < static_cast<int>(keys.size()); ++j) {
VLOG(5) << "DEBUG GeoCommunicator::RecvSparse recv sparse key" << keys[j] VLOG(5) << "DEBUG GeoCommunicator::RecvSparse recv sparse key" << keys[j]
......
...@@ -185,9 +185,8 @@ inline void MergeVars(const std::string &var_name, ...@@ -185,9 +185,8 @@ inline void MergeVars(const std::string &var_name,
} }
// set output tensor to 0. // set output tensor to 0.
paddle::platform::CPUDeviceContext cpu_ctx; phi::CPUContext cpu_ctx;
phi::funcs::SetConstant<paddle::platform::CPUDeviceContext, T> phi::funcs::SetConstant<phi::CPUContext, T> constant_functor;
constant_functor;
constant_functor(cpu_ctx, out_t, static_cast<T>(0)); constant_functor(cpu_ctx, out_t, static_cast<T>(0));
// sum all vars to out // sum all vars to out
auto result = EigenVector<T>::Flatten(*out_t); auto result = EigenVector<T>::Flatten(*out_t);
...@@ -210,16 +209,13 @@ inline void MergeVars(const std::string &var_name, ...@@ -210,16 +209,13 @@ inline void MergeVars(const std::string &var_name,
for (auto &var : vars) { for (auto &var : vars) {
inputs.push_back(&var->Get<phi::SelectedRows>()); inputs.push_back(&var->Get<phi::SelectedRows>());
} }
paddle::platform::CPUDeviceContext dev_ctx; phi::CPUContext dev_ctx;
if (merge_add) { if (merge_add) {
paddle::operators::math::scatter:: paddle::operators::math::scatter::MergeAdd<phi::CPUContext, T> merge_add;
MergeAdd<paddle::platform::CPUDeviceContext, T>
merge_add;
merge_add(dev_ctx, inputs, out_slr); merge_add(dev_ctx, inputs, out_slr);
} else { } else {
paddle::operators::math::scatter:: paddle::operators::math::scatter::MergeAverage<phi::CPUContext, T>
MergeAverage<paddle::platform::CPUDeviceContext, T> merge_average;
merge_average;
merge_average(dev_ctx, inputs, out_slr); merge_average(dev_ctx, inputs, out_slr);
} }
......
...@@ -48,8 +48,7 @@ void CheckTensorHasNanOrInf(const std::string& api_name, const Tensor& tensor) { ...@@ -48,8 +48,7 @@ void CheckTensorHasNanOrInf(const std::string& api_name, const Tensor& tensor) {
#endif #endif
return; return;
} }
paddle::framework::details::tensor_check< paddle::framework::details::tensor_check<phi::CPUContext>(
paddle::platform::CPUDeviceContext>(
api_name, tensor_name, *dense_tensor, place); api_name, tensor_name, *dense_tensor, place);
} }
} }
......
...@@ -90,9 +90,8 @@ REGISTER_OP_WITHOUT_GRADIENT( ...@@ -90,9 +90,8 @@ REGISTER_OP_WITHOUT_GRADIENT(
test_op, test_op,
paddle::framework::TestOpWithKernel, paddle::framework::TestOpWithKernel,
paddle::framework::OpKernelTestProtoAndCheckerMaker); paddle::framework::OpKernelTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(test_op,
test_op, paddle::framework::TestKernel<phi::CPUContext, float>);
paddle::framework::TestKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
test_op, test_op,
paddle::framework::TestKernel<paddle::platform::CUDADeviceContext, float>); paddle::framework::TestKernel<paddle::platform::CUDADeviceContext, float>);
......
...@@ -44,8 +44,8 @@ void CastDataLayout::apply() { ...@@ -44,8 +44,8 @@ void CastDataLayout::apply() {
auto place = ctx_->GetPlace(); auto place = ctx_->GetPlace();
if (platform::is_cpu_place(place)) { if (platform::is_cpu_place(place)) {
phi::funcs::Transpose<platform::CPUDeviceContext, T, 4> trans4; phi::funcs::Transpose<phi::CPUContext, T, 4> trans4;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_); auto* context = static_cast<const phi::CPUContext*>(ctx_);
trans4(*context, in_, out_, axis_); trans4(*context, in_, out_, axis_);
} else { } else {
PADDLE_THROW(platform::errors::PreconditionNotMet( PADDLE_THROW(platform::errors::PreconditionNotMet(
......
...@@ -94,8 +94,8 @@ struct CastDataType { ...@@ -94,8 +94,8 @@ struct CastDataType {
auto* out_begin = out_->mutable_data<OutType>(in_.place()); auto* out_begin = out_->mutable_data<OutType>(in_.place());
if (platform::is_cpu_place(in_.place())) { if (platform::is_cpu_place(in_.place())) {
platform::Transform<platform::CPUDeviceContext> trans; platform::Transform<phi::CPUContext> trans;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_); auto* context = static_cast<const phi::CPUContext*>(ctx_);
trans(*context, trans(*context,
in_begin, in_begin,
in_end, in_end,
......
...@@ -117,7 +117,7 @@ struct TestBroadcastOpHandle { ...@@ -117,7 +117,7 @@ struct TestBroadcastOpHandle {
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace(); auto p = p::CPUPlace();
place_list_.push_back(p); place_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p)); ctxs_.emplace_back(new phi::CPUContext(p));
} }
#if defined(PADDLE_WITH_XPU_BKCL) #if defined(PADDLE_WITH_XPU_BKCL)
bkcl_ctxs_.reset(nullptr); bkcl_ctxs_.reset(nullptr);
......
...@@ -69,7 +69,7 @@ struct TestGatherOpHandle { ...@@ -69,7 +69,7 @@ struct TestGatherOpHandle {
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace(); auto p = p::CPUPlace();
gpu_list_.push_back(p); gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p)); ctxs_.emplace_back(new phi::CPUContext(p));
} }
} }
} }
......
...@@ -316,7 +316,7 @@ template <> ...@@ -316,7 +316,7 @@ template <>
template <> template <>
template <typename T> template <typename T>
void TensorCheckerVisitor<platform::CPUDeviceContext>::apply( void TensorCheckerVisitor<phi::CPUContext>::apply(
typename std::enable_if< typename std::enable_if<
std::is_floating_point<T>::value || std::is_floating_point<T>::value ||
std::is_same<T, ::paddle::platform::complex<float>>::value || std::is_same<T, ::paddle::platform::complex<float>>::value ||
...@@ -329,11 +329,11 @@ void TensorCheckerVisitor<platform::CPUDeviceContext>::apply( ...@@ -329,11 +329,11 @@ void TensorCheckerVisitor<platform::CPUDeviceContext>::apply(
} }
template <> template <>
void tensor_check<platform::CPUDeviceContext>(const std::string& op_type, void tensor_check<phi::CPUContext>(const std::string& op_type,
const std::string& var_name, const std::string& var_name,
const framework::Tensor& tensor, const framework::Tensor& tensor,
const platform::Place& place) { const platform::Place& place) {
TensorCheckerVisitor<platform::CPUDeviceContext> vistor( TensorCheckerVisitor<phi::CPUContext> vistor(
op_type, var_name, tensor, place); op_type, var_name, tensor, place);
VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor); VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
} }
...@@ -439,7 +439,7 @@ void CheckVarHasNanOrInf(const std::string& op_type, ...@@ -439,7 +439,7 @@ void CheckVarHasNanOrInf(const std::string& op_type,
#endif #endif
return; return;
} }
tensor_check<platform::CPUDeviceContext>(op_type, var_name, *tensor, place); tensor_check<phi::CPUContext>(op_type, var_name, *tensor, place);
} }
void CheckVarHasNanOrInf(const std::string& op_type, void CheckVarHasNanOrInf(const std::string& op_type,
......
...@@ -81,7 +81,7 @@ struct TestReduceOpHandle { ...@@ -81,7 +81,7 @@ struct TestReduceOpHandle {
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace(); auto p = p::CPUPlace();
gpu_list_.push_back(p); gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p)); ctxs_.emplace_back(new phi::CPUContext(p));
} }
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
nccl_ctxs_.reset(nullptr); nccl_ctxs_.reset(nullptr);
......
...@@ -144,7 +144,7 @@ LoDTensor LodExpand(const LoDTensor& source, ...@@ -144,7 +144,7 @@ LoDTensor LodExpand(const LoDTensor& source,
auto slice = tensor.Slice(elem, elem + 1); auto slice = tensor.Slice(elem, elem + 1);
TensorCopy(source.Slice(ins, ins + 1), TensorCopy(source.Slice(ins, ins + 1),
platform::CPUPlace(), platform::CPUPlace(),
platform::CPUDeviceContext(), phi::CPUContext(),
&slice); &slice);
} }
} }
......
...@@ -232,9 +232,8 @@ class OpKernelTest : public paddle::framework::OpKernel<T> { ...@@ -232,9 +232,8 @@ class OpKernelTest : public paddle::framework::OpKernel<T> {
REGISTER_OP_WITHOUT_GRADIENT(op_with_kernel, REGISTER_OP_WITHOUT_GRADIENT(op_with_kernel,
paddle::framework::OpWithKernelTest, paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestMaker); paddle::framework::OpKernelTestMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(op_with_kernel,
op_with_kernel, paddle::framework::OpKernelTest<phi::CPUContext, float>);
paddle::framework::OpKernelTest<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
op_with_kernel, op_with_kernel,
...@@ -264,10 +263,9 @@ TEST(OperatorRegistrar, CUDA) { ...@@ -264,10 +263,9 @@ TEST(OperatorRegistrar, CUDA) {
} }
static int op_test_value = 0; static int op_test_value = 0;
using paddle::platform::CPUDeviceContext;
using paddle::platform::CUDADeviceContext; using paddle::platform::CUDADeviceContext;
using paddle::platform::DeviceContext; using paddle::platform::DeviceContext;
using phi::CPUContext;
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -295,8 +293,7 @@ class OpMultiKernelTest : public paddle::framework::OpKernel<T> { ...@@ -295,8 +293,7 @@ class OpMultiKernelTest : public paddle::framework::OpKernel<T> {
}; };
template <typename T> template <typename T>
class OpMultiKernelTest<CPUDeviceContext, T> class OpMultiKernelTest<CPUContext, T> : public paddle::framework::OpKernel<T> {
: public paddle::framework::OpKernel<T> {
public: public:
void Compute(const paddle::framework::ExecutionContext& ctx) const { void Compute(const paddle::framework::ExecutionContext& ctx) const {
++op_test_value; ++op_test_value;
...@@ -319,7 +316,7 @@ class OpMultiKernelTest2 : public paddle::framework::OpKernel<T> { ...@@ -319,7 +316,7 @@ class OpMultiKernelTest2 : public paddle::framework::OpKernel<T> {
}; };
template <typename T> template <typename T>
class OpMultiKernelTest2<CPUDeviceContext, T> class OpMultiKernelTest2<CPUContext, T>
: public paddle::framework::OpKernel<T> { : public paddle::framework::OpKernel<T> {
public: public:
void Compute(const paddle::framework::ExecutionContext& ctx) const { void Compute(const paddle::framework::ExecutionContext& ctx) const {
...@@ -342,16 +339,14 @@ class OpMultiKernelTest2<CUDADeviceContext, T> ...@@ -342,16 +339,14 @@ class OpMultiKernelTest2<CUDADeviceContext, T>
REGISTER_OP_WITHOUT_GRADIENT(op_with_multi_kernel, REGISTER_OP_WITHOUT_GRADIENT(op_with_multi_kernel,
paddle::framework::OpWithMultiKernelTest, paddle::framework::OpWithMultiKernelTest,
paddle::framework::OpKernelTestMaker); paddle::framework::OpKernelTestMaker);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(op_with_multi_kernel,
op_with_multi_kernel, CPU,
CPU, paddle::platform::CPUPlace,
paddle::platform::CPUPlace, paddle::framework::OpMultiKernelTest<CPUContext, float>);
paddle::framework::OpMultiKernelTest<CPUDeviceContext, float>); REGISTER_OP_KERNEL(op_with_multi_kernel,
REGISTER_OP_KERNEL( MKLDNN,
op_with_multi_kernel, paddle::platform::CPUPlace,
MKLDNN, paddle::framework::OpMultiKernelTest2<CPUContext, float>);
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest2<CPUDeviceContext, float>);
REGISTER_OP_KERNEL( REGISTER_OP_KERNEL(
op_with_multi_kernel, op_with_multi_kernel,
CUDA, CUDA,
......
...@@ -420,16 +420,13 @@ REGISTER_OP_WITHOUT_GRADIENT( ...@@ -420,16 +420,13 @@ REGISTER_OP_WITHOUT_GRADIENT(
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
indicate_lod_tensor_data_type_test, indicate_lod_tensor_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext, paddle::framework::EmptyTestKernel<phi::CPUContext, int>);
int>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
indicate_selected_rows_data_type_test, indicate_selected_rows_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext, paddle::framework::EmptyTestKernel<phi::CPUContext, int>);
int>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
indicate_other_data_type_test, indicate_other_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext, paddle::framework::EmptyTestKernel<phi::CPUContext, int>);
int>);
TEST(IndicateVarDataTypeTest, lodtensor) { TEST(IndicateVarDataTypeTest, lodtensor) {
paddle::framework::InitDevices(); paddle::framework::InitDevices();
...@@ -599,16 +596,14 @@ REGISTER_OP_WITHOUT_GRADIENT(get_lod_level_test, ...@@ -599,16 +596,14 @@ REGISTER_OP_WITHOUT_GRADIENT(get_lod_level_test,
paddle::framework::GetSetLoDLevelTestMaker); paddle::framework::GetSetLoDLevelTestMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
get_lod_level_test, get_lod_level_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext, paddle::framework::EmptyTestKernel<phi::CPUContext, float>);
float>);
REGISTER_OP_WITHOUT_GRADIENT(set_lod_level_test, REGISTER_OP_WITHOUT_GRADIENT(set_lod_level_test,
paddle::framework::SetLoDLevelTest, paddle::framework::SetLoDLevelTest,
paddle::framework::GetSetLoDLevelTestMaker); paddle::framework::GetSetLoDLevelTestMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
set_lod_level_test, set_lod_level_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext, paddle::framework::EmptyTestKernel<phi::CPUContext, float>);
float>);
void SetGetLoDLevelTestMain(std::string op_type) { void SetGetLoDLevelTestMain(std::string op_type) {
paddle::framework::InitDevices({}); paddle::framework::InitDevices({});
......
...@@ -66,7 +66,7 @@ struct ConvertToPhiContext { ...@@ -66,7 +66,7 @@ struct ConvertToPhiContext {
}; };
template <> template <>
struct ConvertToPhiContext<platform::CPUDeviceContext> { struct ConvertToPhiContext<phi::CPUContext> {
using TYPE = phi::CPUContext; using TYPE = phi::CPUContext;
}; };
......
...@@ -53,7 +53,7 @@ TEST_F(SelectedRowsTester, complete_dims) { ...@@ -53,7 +53,7 @@ TEST_F(SelectedRowsTester, complete_dims) {
TEST_F(SelectedRowsTester, SerializeAndDeseralize) { TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
phi::SelectedRows dst_tensor; phi::SelectedRows dst_tensor;
platform::CPUDeviceContext cpu_ctx(place_); phi::CPUContext cpu_ctx(place_);
std::ostringstream oss; std::ostringstream oss;
SerializeToStream(oss, *selected_rows_, cpu_ctx); SerializeToStream(oss, *selected_rows_, cpu_ctx);
......
...@@ -1253,7 +1253,7 @@ void TensorFromStream(std::istream& is, ...@@ -1253,7 +1253,7 @@ void TensorFromStream(std::istream& is,
is.seekg(seekg, is.cur); is.seekg(seekg, is.cur);
void* buf; void* buf;
platform::CPUDeviceContext ctx; phi::CPUContext ctx;
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) || if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) || platform::is_xpu_place(dev_ctx.GetPlace()) ||
...@@ -1336,7 +1336,7 @@ void TensorFromStream(std::istream& is, ...@@ -1336,7 +1336,7 @@ void TensorFromStream(std::istream& is,
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
tensor->Resize(phi::make_ddim(dims)); tensor->Resize(phi::make_ddim(dims));
void* buf; void* buf;
platform::CPUDeviceContext ctx; phi::CPUContext ctx;
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) || if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) || platform::is_xpu_place(dev_ctx.GetPlace()) ||
......
...@@ -24,7 +24,7 @@ namespace framework { ...@@ -24,7 +24,7 @@ namespace framework {
TEST(TensorCopy, Tensor) { TEST(TensorCopy, Tensor) {
Tensor src_tensor; Tensor src_tensor;
Tensor dst_tensor; Tensor dst_tensor;
platform::CPUDeviceContext cpu_ctx((platform::CPUPlace())); phi::CPUContext cpu_ctx((platform::CPUPlace()));
int* src_ptr = src_tensor.mutable_data<int>(phi::make_ddim({3, 3}), int* src_ptr = src_tensor.mutable_data<int>(phi::make_ddim({3, 3}),
platform::CPUPlace()); platform::CPUPlace());
...@@ -164,7 +164,7 @@ TEST(TensorFromVector, Tensor) { ...@@ -164,7 +164,7 @@ TEST(TensorFromVector, Tensor) {
// Copy to CPU Tensor // Copy to CPU Tensor
cpu_tensor.Resize(phi::make_ddim({3, 3})); cpu_tensor.Resize(phi::make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace(); auto cpu_place = new paddle::platform::CPUPlace();
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place); phi::CPUContext cpu_ctx(*cpu_place);
paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor); paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
// Copy to GPUTensor // Copy to GPUTensor
...@@ -255,20 +255,23 @@ TEST(TensorToVector, Tensor) { ...@@ -255,20 +255,23 @@ TEST(TensorToVector, Tensor) {
#endif #endif
} }
TEST(TensorToVector, Tensor_bool){{paddle::framework::Tensor src; TEST(TensorToVector, Tensor_bool) {
bool* src_ptr = src.mutable_data<bool>({3, 3}, paddle::platform::CPUPlace()); {
for (int i = 0; i < 3 * 3; ++i) { paddle::framework::Tensor src;
src_ptr[i] = static_cast<bool>(i % 2); bool* src_ptr = src.mutable_data<bool>({3, 3}, paddle::platform::CPUPlace());
} for (int i = 0; i < 3 * 3; ++i) {
src_ptr[i] = static_cast<bool>(i % 2);
}
paddle::platform::CPUPlace place; paddle::platform::CPUPlace place;
std::vector<bool> dst; std::vector<bool> dst;
paddle::framework::TensorToVector<bool>(src, &dst); paddle::framework::TensorToVector<bool>(src, &dst);
for (int i = 0; i < 3 * 3; ++i) { for (int i = 0; i < 3 * 3; ++i) {
EXPECT_EQ(src_ptr[i], dst[i]); EXPECT_EQ(src_ptr[i], dst[i]);
}
} }
} // namespace framework
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
{ {
std::vector<bool> src_vec = { std::vector<bool> src_vec = {
...@@ -325,7 +328,7 @@ for (int i = 0; i < 3 * 3; ++i) { ...@@ -325,7 +328,7 @@ for (int i = 0; i < 3 * 3; ++i) {
} }
} }
#endif #endif
} // namespace paddle }
TEST(TensorFromDLPack, Tensor) { TEST(TensorFromDLPack, Tensor) {
{ {
...@@ -334,7 +337,7 @@ TEST(TensorFromDLPack, Tensor) { ...@@ -334,7 +337,7 @@ TEST(TensorFromDLPack, Tensor) {
cpu_tensor.Resize(phi::make_ddim({3, 3})); cpu_tensor.Resize(phi::make_ddim({3, 3}));
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext cpu_ctx(cpu_place); phi::CPUContext cpu_ctx(cpu_place);
paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor); paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
paddle::framework::DLPackTensor dlpack_tensor(cpu_tensor, 1); paddle::framework::DLPackTensor dlpack_tensor(cpu_tensor, 1);
...@@ -360,7 +363,7 @@ TEST(TensorFromDLPack, Tensor) { ...@@ -360,7 +363,7 @@ TEST(TensorFromDLPack, Tensor) {
// Copy to CPU Tensor // Copy to CPU Tensor
cpu_tensor.Resize(phi::make_ddim({3, 3})); cpu_tensor.Resize(phi::make_ddim({3, 3}));
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext cpu_ctx(cpu_place); phi::CPUContext cpu_ctx(cpu_place);
paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor); paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
// Copy to GPUTensor // Copy to GPUTensor
...@@ -502,7 +505,7 @@ TEST(Tensor, FromAndToStream) { ...@@ -502,7 +505,7 @@ TEST(Tensor, FromAndToStream) {
{ {
framework::Tensor dst_tensor; framework::Tensor dst_tensor;
auto place = new platform::CPUPlace(); auto place = new platform::CPUPlace();
platform::CPUDeviceContext cpu_ctx(*place); phi::CPUContext cpu_ctx(*place);
std::ostringstream oss; std::ostringstream oss;
TensorToStream(oss, src_tensor, cpu_ctx); TensorToStream(oss, src_tensor, cpu_ctx);
......
...@@ -46,8 +46,8 @@ void GLOOParallelContext::Init() { ...@@ -46,8 +46,8 @@ void GLOOParallelContext::Init() {
int port = std::stoi(addr[1]); int port = std::stoi(addr[1]);
gloo_wrapper->SetHttpStore(host, port, "worker"); gloo_wrapper->SetHttpStore(host, port, "worker");
gloo_wrapper->Init(); gloo_wrapper->Init();
device_ = std::unique_ptr<platform::CPUDeviceContext>( device_ = std::unique_ptr<phi::CPUContext>(
new platform::CPUDeviceContext(platform::CPUPlace())); new phi::CPUContext(platform::CPUPlace()));
device_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() device_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(platform::CPUPlace()) .GetAllocator(platform::CPUPlace())
.get()); .get());
...@@ -200,7 +200,7 @@ void GLOOParallelContext::Broadcast(framework::Variable *src, int ring_id) { ...@@ -200,7 +200,7 @@ void GLOOParallelContext::Broadcast(framework::Variable *src, int ring_id) {
paddle::platform::DeviceContext *GLOOParallelContext::GetDeviceContext( paddle::platform::DeviceContext *GLOOParallelContext::GetDeviceContext(
int ring_id) { int ring_id) {
// return the CPUDeviceContext // return the CPUContext
return device_.get(); return device_.get();
} }
......
...@@ -64,7 +64,7 @@ class GLOOParallelContext : public ParallelContext { ...@@ -64,7 +64,7 @@ class GLOOParallelContext : public ParallelContext {
void AllReduce(const phi::SelectedRows& src, phi::SelectedRows* dst); void AllReduce(const phi::SelectedRows& src, phi::SelectedRows* dst);
private: private:
std::unique_ptr<platform::CPUDeviceContext> device_; std::unique_ptr<phi::CPUContext> device_;
}; };
} // namespace imperative } // namespace imperative
......
...@@ -85,9 +85,9 @@ class TensorAddFunctor : public boost::static_visitor<> { ...@@ -85,9 +85,9 @@ class TensorAddFunctor : public boost::static_visitor<> {
: numel_(numel), x_(x), y_(y) {} : numel_(numel), x_(x), y_(y) {}
void operator()(const platform::CPUPlace& place) const { void operator()(const platform::CPUPlace& place) const {
platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>( phi::CPUContext* ctx = dynamic_cast<phi::CPUContext*>(
platform::DeviceContextPool::Instance().Get(place)); platform::DeviceContextPool::Instance().Get(place));
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, T>(*ctx); auto blas = phi::funcs::GetBlas<phi::CPUContext, T>(*ctx);
blas.AXPY(numel_, 1., x_, y_); blas.AXPY(numel_, 1., x_, y_);
} }
...@@ -438,7 +438,7 @@ void TensorAdd(const VarType& src, VarType* dst) { ...@@ -438,7 +438,7 @@ void TensorAdd(const VarType& src, VarType* dst) {
place)); place));
#endif #endif
} else if (platform::is_cpu_place(place)) { } else if (platform::is_cpu_place(place)) {
return TensorAddImpl<platform::CPUDeviceContext, platform::float16>( return TensorAddImpl<phi::CPUContext, platform::float16>(
src_tensor, dst_tensor, place); src_tensor, dst_tensor, place);
} }
} }
...@@ -455,7 +455,7 @@ void TensorAdd(const VarType& src, VarType* dst) { ...@@ -455,7 +455,7 @@ void TensorAdd(const VarType& src, VarType* dst) {
place)); place));
#endif #endif
} else if (platform::is_cpu_place(place)) { } else if (platform::is_cpu_place(place)) {
return TensorAddImpl<platform::CPUDeviceContext, platform::bfloat16>( return TensorAddImpl<phi::CPUContext, platform::bfloat16>(
src_tensor, dst_tensor, place); src_tensor, dst_tensor, place);
} }
} }
...@@ -498,8 +498,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) { ...@@ -498,8 +498,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(platform::CUDADeviceContext, double); PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(platform::CUDADeviceContext, double);
} else { } else {
#endif #endif
PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(platform::CPUDeviceContext, float); PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(phi::CPUContext, float);
PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(platform::CPUDeviceContext, double); PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(phi::CPUContext, double);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} }
#endif #endif
...@@ -550,8 +550,8 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var, ...@@ -550,8 +550,8 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
PADDLE_SELECTED_ROWS_ADD_TENSOR(platform::CUDADeviceContext, double); PADDLE_SELECTED_ROWS_ADD_TENSOR(platform::CUDADeviceContext, double);
} else { } else {
#endif #endif
PADDLE_SELECTED_ROWS_ADD_TENSOR(platform::CPUDeviceContext, float); PADDLE_SELECTED_ROWS_ADD_TENSOR(phi::CPUContext, float);
PADDLE_SELECTED_ROWS_ADD_TENSOR(platform::CPUDeviceContext, double); PADDLE_SELECTED_ROWS_ADD_TENSOR(phi::CPUContext, double);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} }
#endif #endif
...@@ -613,8 +613,8 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1, ...@@ -613,8 +613,8 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
PADDLE_SELECTED_ROWS_ADD(platform::CUDADeviceContext, double); PADDLE_SELECTED_ROWS_ADD(platform::CUDADeviceContext, double);
} else { } else {
#endif #endif
PADDLE_SELECTED_ROWS_ADD(platform::CPUDeviceContext, float); PADDLE_SELECTED_ROWS_ADD(phi::CPUContext, float);
PADDLE_SELECTED_ROWS_ADD(platform::CPUDeviceContext, double); PADDLE_SELECTED_ROWS_ADD(phi::CPUContext, double);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} }
#endif #endif
......
...@@ -53,12 +53,11 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) { ...@@ -53,12 +53,11 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) {
} }
framework::VisitDataTypeForHIP( framework::VisitDataTypeForHIP(
dtype_, dtype_,
DivNRanksForAllReduce<platform::CPUDeviceContext>( DivNRanksForAllReduce<phi::CPUContext>(tensor, nranks, context));
tensor, nranks, context));
#else #else
framework::VisitDataType(dtype_, framework::VisitDataType(
DivNRanksForAllReduce<platform::CPUDeviceContext>( dtype_,
tensor, nranks, context)); DivNRanksForAllReduce<phi::CPUContext>(tensor, nranks, context));
#endif #endif
VLOG(4) << "after div 2" << *tensor; VLOG(4) << "after div 2" << *tensor;
} else if (platform::is_xpu_place(tensor->place())) { } else if (platform::is_xpu_place(tensor->place())) {
...@@ -328,11 +327,10 @@ void Group::ConcatTensors(const platform::DeviceContext &context) { ...@@ -328,11 +327,10 @@ void Group::ConcatTensors(const platform::DeviceContext &context) {
"Please recompile or reinstall Paddle with CNCL support.")); "Please recompile or reinstall Paddle with CNCL support."));
#endif #endif
} else if (platform::is_cpu_place(place)) { } else if (platform::is_cpu_place(place)) {
ConcatTensorsWithType( ConcatTensorsWithType(static_cast<const phi::CPUContext &>(context),
static_cast<const platform::CPUDeviceContext &>(context), dense_tensors_,
dense_tensors_, &dense_contents_,
&dense_contents_, dtype_);
dtype_);
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Concat grad tensor not supported on place (%s)", place)); "Concat grad tensor not supported on place (%s)", place));
...@@ -390,11 +388,10 @@ void Group::SplitTensors(const platform::DeviceContext &context) { ...@@ -390,11 +388,10 @@ void Group::SplitTensors(const platform::DeviceContext &context) {
"Please recompile or reinstall Paddle with CNCL support.")); "Please recompile or reinstall Paddle with CNCL support."));
#endif #endif
} else if (platform::is_cpu_place(place)) { } else if (platform::is_cpu_place(place)) {
SplitTensorsWithType( SplitTensorsWithType(static_cast<const phi::CPUContext &>(context),
static_cast<const platform::CPUDeviceContext &>(context), &dense_contents_,
&dense_contents_, &dense_tensors_,
&dense_tensors_, dtype_);
dtype_);
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Split grad tensor not supported on place (%s)", place)); "Split grad tensor not supported on place (%s)", place));
......
...@@ -234,7 +234,7 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -234,7 +234,7 @@ void LiteSubgraphPass::SetUpEngine(
framework::Scope* scope, framework::Scope* scope,
const std::vector<std::string>& params) { const std::vector<std::string>& params) {
std::ostringstream os; std::ostringstream os;
platform::CPUDeviceContext ctx; phi::CPUContext ctx;
for (const auto& param : params) { for (const auto& param : params) {
VLOG(3) << "Serialize param: " << param; VLOG(3) << "Serialize param: " << param;
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
......
...@@ -365,7 +365,7 @@ void ConvertToMixedPrecision(const std::string& model_file, ...@@ -365,7 +365,7 @@ void ConvertToMixedPrecision(const std::string& model_file,
[](framework::Scope* scope, [](framework::Scope* scope,
const std::vector<std::string>& params) -> std::string { const std::vector<std::string>& params) -> std::string {
std::ostringstream os; std::ostringstream os;
platform::CPUDeviceContext ctx; phi::CPUContext ctx;
for (const auto& param : params) { for (const auto& param : params) {
VLOG(3) << "Serialize param: " << param; VLOG(3) << "Serialize param: " << param;
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
......
...@@ -81,7 +81,7 @@ void make_fake_model(std::string* model, std::string* param) { ...@@ -81,7 +81,7 @@ void make_fake_model(std::string* model, std::string* param) {
ctx.PartialInitWithAllocator(); ctx.PartialInitWithAllocator();
#else #else
platform::CPUPlace place; platform::CPUPlace place;
platform::CPUDeviceContext ctx(place); phi::CPUContext ctx(place);
#endif #endif
// Prepare variables. // Prepare variables.
std::vector<std::string> repetitive_params{"x", "y"}; std::vector<std::string> repetitive_params{"x", "y"};
......
...@@ -62,7 +62,7 @@ void IOConverterTester(const platform::DeviceContext& ctx) { ...@@ -62,7 +62,7 @@ void IOConverterTester(const platform::DeviceContext& ctx) {
TEST(EngineIOConverterTester, DefaultCPU) { TEST(EngineIOConverterTester, DefaultCPU) {
platform::CPUPlace place; platform::CPUPlace place;
platform::CPUDeviceContext ctx(place); phi::CPUContext ctx(place);
IOConverterTester(ctx); IOConverterTester(ctx);
} }
......
...@@ -1469,20 +1469,16 @@ namespace plat = paddle::platform; ...@@ -1469,20 +1469,16 @@ namespace plat = paddle::platform;
ops::ActivationOpGrad, \ ops::ActivationOpGrad, \
ops::ActivationGradOpInplaceInferer); ops::ActivationGradOpInplaceInferer);
#define REGISTER_ACTIVATION_CPU_KERNEL( \ #define REGISTER_ACTIVATION_CPU_KERNEL( \
act_type, op_name, functor, grad_functor) \ act_type, op_name, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \ REGISTER_OP_CPU_KERNEL( \
act_type, \ act_type, \
ops::ActivationKernel<paddle::platform::CPUDeviceContext, \ ops::ActivationKernel<phi::CPUContext, ops::functor<float>>, \
ops::functor<float>>, \ ops::ActivationKernel<phi::CPUContext, ops::functor<double>>); \
ops::ActivationKernel<paddle::platform::CPUDeviceContext, \ REGISTER_OP_CPU_KERNEL( \
ops::functor<double>>); \ act_type##_grad, \
REGISTER_OP_CPU_KERNEL( \ ops::ActivationGradKernel<phi::CPUContext, ops::grad_functor<float>>, \
act_type##_grad, \ ops::ActivationGradKernel<phi::CPUContext, ops::grad_functor<double>>);
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
ops::grad_functor<float>>, \
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
ops::grad_functor<double>>);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
......
...@@ -122,12 +122,11 @@ REGISTER_OPERATOR( ...@@ -122,12 +122,11 @@ REGISTER_OPERATOR(
ops::AddPositionEncodingGradOpMaker<paddle::imperative::OpBase>); ops::AddPositionEncodingGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(add_position_encoding_grad, ops::AddPositionEncodingOpGrad); REGISTER_OPERATOR(add_position_encoding_grad, ops::AddPositionEncodingOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(add_position_encoding,
add_position_encoding, ops::AddPositionEncodingKernel<phi::CPUContext, float>,
ops::AddPositionEncodingKernel<plt::CPUDeviceContext, float>, ops::AddPositionEncodingKernel<phi::CPUContext, double>);
ops::AddPositionEncodingKernel<plt::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
add_position_encoding_grad, add_position_encoding_grad,
ops::AddPositionEncodingGradKernel<plt::CPUDeviceContext, float>, ops::AddPositionEncodingGradKernel<phi::CPUContext, float>,
ops::AddPositionEncodingGradKernel<plt::CPUDeviceContext, double>); ops::AddPositionEncodingGradKernel<phi::CPUContext, double>);
...@@ -342,7 +342,7 @@ DECLARE_INPLACE_OP_INFERER(AffineChannelGradInplaceInferer, ...@@ -342,7 +342,7 @@ DECLARE_INPLACE_OP_INFERER(AffineChannelGradInplaceInferer,
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR(affine_channel, REGISTER_OPERATOR(affine_channel,
ops::AffineChannelOp, ops::AffineChannelOp,
......
...@@ -28,7 +28,7 @@ namespace operators { ...@@ -28,7 +28,7 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
template <typename T> template <typename T>
struct Linspace<paddle::platform::CPUDeviceContext, T> { struct Linspace<phi::CPUContext, T> {
void operator()(T start, void operator()(T start,
T end, T end,
int count, int count,
...@@ -282,14 +282,12 @@ REGISTER_OPERATOR(affine_grid, ...@@ -282,14 +282,12 @@ REGISTER_OPERATOR(affine_grid,
ops::AffineGridGradMaker<paddle::imperative::OpBase>); ops::AffineGridGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(affine_grid_grad, ops::AffineGridOpGrad); REGISTER_OPERATOR(affine_grid_grad, ops::AffineGridOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(affine_grid,
affine_grid, ops::AffineGridOpKernel<phi::CPUContext, float>,
ops::AffineGridOpKernel<paddle::platform::CPUDeviceContext, float>, ops::AffineGridOpKernel<phi::CPUContext, double>);
ops::AffineGridOpKernel<paddle::platform::CPUDeviceContext, double>); REGISTER_OP_CPU_KERNEL(affine_grid_grad,
REGISTER_OP_CPU_KERNEL( ops::AffineGridGradOpKernel<phi::CPUContext, float>,
affine_grid_grad, ops::AffineGridGradOpKernel<phi::CPUContext, double>);
ops::AffineGridGradOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::AffineGridGradOpKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_VERSION(affine_grid) REGISTER_OP_VERSION(affine_grid)
.AddCheckpoint( .AddCheckpoint(
......
...@@ -84,7 +84,7 @@ class AllcloseOpVarTypeInference : public framework::VarTypeInference { ...@@ -84,7 +84,7 @@ class AllcloseOpVarTypeInference : public framework::VarTypeInference {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
DECLARE_INFER_SHAPE_FUNCTOR(allclose, DECLARE_INFER_SHAPE_FUNCTOR(allclose,
AllcloseInferShapeFunctor, AllcloseInferShapeFunctor,
......
...@@ -65,7 +65,7 @@ class AllocFloatStatusKernel : public framework::OpKernel<T> { ...@@ -65,7 +65,7 @@ class AllocFloatStatusKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
alloc_float_status, alloc_float_status,
......
...@@ -95,7 +95,7 @@ template <typename T> ...@@ -95,7 +95,7 @@ template <typename T>
class CheckFiniteAndUnscaleCpuKernel : public framework::OpKernel<T> { class CheckFiniteAndUnscaleCpuKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const { void Compute(const framework::ExecutionContext& ctx) const {
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
const auto xs = ctx.MultiInput<framework::Tensor>("X"); const auto xs = ctx.MultiInput<framework::Tensor>("X");
const auto* scale = ctx.Input<framework::Tensor>("Scale"); const auto* scale = ctx.Input<framework::Tensor>("Scale");
auto outs = ctx.MultiOutput<framework::Tensor>("Out"); auto outs = ctx.MultiOutput<framework::Tensor>("Out");
...@@ -106,11 +106,10 @@ class CheckFiniteAndUnscaleCpuKernel : public framework::OpKernel<T> { ...@@ -106,11 +106,10 @@ class CheckFiniteAndUnscaleCpuKernel : public framework::OpKernel<T> {
*found_inf_data = false; *found_inf_data = false;
framework::Tensor is_finite = framework::Tensor is_finite =
ctx.AllocateTmpTensor<bool, platform::CPUDeviceContext>({1}, dev_ctx); ctx.AllocateTmpTensor<bool, phi::CPUContext>({1}, dev_ctx);
bool* is_finite_data = is_finite.template data<bool>(); bool* is_finite_data = is_finite.template data<bool>();
auto& dev = *ctx.template device_context<platform::CPUDeviceContext>() auto& dev = *ctx.template device_context<phi::CPUContext>().eigen_device();
.eigen_device();
T inverse_scale = Inverse<T>(*scale_data); T inverse_scale = Inverse<T>(*scale_data);
for (size_t i = 0; i < xs.size(); ++i) { for (size_t i = 0; i < xs.size(); ++i) {
......
...@@ -68,7 +68,7 @@ class ClearFloatStatusKernel : public framework::OpKernel<T> { ...@@ -68,7 +68,7 @@ class ClearFloatStatusKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
clear_float_status, clear_float_status,
......
...@@ -67,7 +67,7 @@ class GetFloatStatusKernel : public framework::OpKernel<T> { ...@@ -67,7 +67,7 @@ class GetFloatStatusKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
get_float_status, get_float_status,
......
...@@ -169,9 +169,9 @@ decr_every_n_nan_or_inf steps and each step some gradients are infinite. ...@@ -169,9 +169,9 @@ decr_every_n_nan_or_inf steps and each step some gradients are infinite.
}; };
template <typename T, bool IsFoundInfOnCPU> template <typename T, bool IsFoundInfOnCPU>
class UpdateLossScalingFunctor<platform::CPUDeviceContext, T, IsFoundInfOnCPU> { class UpdateLossScalingFunctor<phi::CPUContext, T, IsFoundInfOnCPU> {
public: public:
void operator()(const platform::CPUDeviceContext& ctx, void operator()(const phi::CPUContext& ctx,
const bool* found_inf_data, const bool* found_inf_data,
const T* pre_loss_scaling_data, const T* pre_loss_scaling_data,
const int* good_in_data, const int* good_in_data,
...@@ -203,9 +203,9 @@ class UpdateLossScalingFunctor<platform::CPUDeviceContext, T, IsFoundInfOnCPU> { ...@@ -203,9 +203,9 @@ class UpdateLossScalingFunctor<platform::CPUDeviceContext, T, IsFoundInfOnCPU> {
}; };
template <typename T> template <typename T>
class LazyZeros<platform::CPUDeviceContext, T> { class LazyZeros<phi::CPUContext, T> {
public: public:
void operator()(const platform::CPUDeviceContext& dev_ctx, void operator()(const phi::CPUContext& dev_ctx,
const bool* found_inf_data, const bool* found_inf_data,
const std::vector<const framework::Tensor*>& xs, const std::vector<const framework::Tensor*>& xs,
const std::vector<framework::Tensor*>& outs) const { const std::vector<framework::Tensor*>& outs) const {
...@@ -225,7 +225,7 @@ class LazyZeros<platform::CPUDeviceContext, T> { ...@@ -225,7 +225,7 @@ class LazyZeros<platform::CPUDeviceContext, T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
update_loss_scaling, update_loss_scaling,
......
...@@ -116,20 +116,16 @@ REGISTER_OPERATOR(angle, ...@@ -116,20 +116,16 @@ REGISTER_OPERATOR(angle,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
angle, angle,
ops::AngleKernel<paddle::platform::CPUDeviceContext, float>, ops::AngleKernel<phi::CPUContext, float>,
ops::AngleKernel<paddle::platform::CPUDeviceContext, double>, ops::AngleKernel<phi::CPUContext, double>,
ops::AngleKernel<paddle::platform::CPUDeviceContext, ops::AngleKernel<phi::CPUContext, paddle::platform::complex<float>>,
paddle::platform::complex<float>>, ops::AngleKernel<phi::CPUContext, paddle::platform::complex<double>>);
ops::AngleKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
REGISTER_OPERATOR(angle_grad, ops::AngleGradOp); REGISTER_OPERATOR(angle_grad, ops::AngleGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
angle_grad, angle_grad,
ops::AngleGradKernel<paddle::platform::CPUDeviceContext, float>, ops::AngleGradKernel<phi::CPUContext, float>,
ops::AngleGradKernel<paddle::platform::CPUDeviceContext, double>, ops::AngleGradKernel<phi::CPUContext, double>,
ops::AngleGradKernel<paddle::platform::CPUDeviceContext, ops::AngleGradKernel<phi::CPUContext, paddle::platform::complex<float>>,
paddle::platform::complex<float>>, ops::AngleGradKernel<phi::CPUContext, paddle::platform::complex<double>>);
ops::AngleGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
...@@ -51,7 +51,7 @@ struct ArrayToLoDFunctor : public boost::static_visitor<void> { ...@@ -51,7 +51,7 @@ struct ArrayToLoDFunctor : public boost::static_visitor<void> {
void operator()(Place place) const { void operator()(Place place) const {
auto &pool = platform::DeviceContextPool::Instance(); auto &pool = platform::DeviceContextPool::Instance();
if (std::is_same<Place, platform::CPUPlace>::value) { if (std::is_same<Place, platform::CPUPlace>::value) {
Apply(static_cast<platform::CPUDeviceContext *>(pool.Get(place))); Apply(static_cast<phi::CPUContext *>(pool.Get(place)));
} else { } else {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Apply(static_cast<platform::CUDADeviceContext *>(pool.Get(place))); Apply(static_cast<platform::CUDADeviceContext *>(pool.Get(place)));
......
...@@ -22,7 +22,7 @@ limitations under the License. */ ...@@ -22,7 +22,7 @@ limitations under the License. */
TEST(AssignOp, AssignLoDTensor) { TEST(AssignOp, AssignLoDTensor) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
paddle::framework::Variable output; paddle::framework::Variable output;
paddle::operators::AssignFunctor assign_functor(&output, ctx); paddle::operators::AssignFunctor assign_functor(&output, ctx);
...@@ -47,7 +47,7 @@ TEST(AssignOp, AssignLoDTensor) { ...@@ -47,7 +47,7 @@ TEST(AssignOp, AssignLoDTensor) {
TEST(AssignOp, AssignLoDTensorArray) { TEST(AssignOp, AssignLoDTensorArray) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
paddle::framework::Variable output; paddle::framework::Variable output;
paddle::operators::AssignFunctor assign_functor(&output, ctx); paddle::operators::AssignFunctor assign_functor(&output, ctx);
...@@ -78,7 +78,7 @@ TEST(AssignOp, AssignLoDTensorArray) { ...@@ -78,7 +78,7 @@ TEST(AssignOp, AssignLoDTensorArray) {
TEST(AssignOp, AssignSelectedRows) { TEST(AssignOp, AssignSelectedRows) {
paddle::platform::CPUPlace cpu_place; paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext ctx(cpu_place); phi::CPUContext ctx(cpu_place);
paddle::framework::Variable output; paddle::framework::Variable output;
paddle::operators::AssignFunctor assign_functor(&output, ctx); paddle::operators::AssignFunctor assign_functor(&output, ctx);
......
...@@ -337,7 +337,7 @@ template <typename T> ...@@ -337,7 +337,7 @@ template <typename T>
class AttentionLSTMKernel : public framework::OpKernel<T> { class AttentionLSTMKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
using DeviceContext = paddle::platform::CPUDeviceContext; using DeviceContext = phi::CPUContext;
auto* x = ctx.Input<LoDTensor>("X"); auto* x = ctx.Input<LoDTensor>("X");
auto* h0 = ctx.Input<Tensor>("H0"); auto* h0 = ctx.Input<Tensor>("H0");
...@@ -416,10 +416,10 @@ class AttentionLSTMKernel : public framework::OpKernel<T> { ...@@ -416,10 +416,10 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
T* lstm_x_data = lstm_x->mutable_data<T>(ctx.GetPlace()); T* lstm_x_data = lstm_x->mutable_data<T>(ctx.GetPlace());
T* lstm_out_data = lstm_out->mutable_data<T>(ctx.GetPlace()); T* lstm_out_data = lstm_out->mutable_data<T>(ctx.GetPlace());
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, T>(ctx); auto blas = phi::funcs::GetBlas<phi::CPUContext, T>(ctx);
// x(TxM) * fc (Mx1) part of atten_wgt(M+D)x1 // x(TxM) * fc (Mx1) part of atten_wgt(M+D)x1
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
phi::funcs::FCFunctor<DeviceContext, T> fc; phi::funcs::FCFunctor<DeviceContext, T> fc;
fc(dev_ctx, fc(dev_ctx,
total_T, total_T,
......
...@@ -18,11 +18,10 @@ namespace paddle { ...@@ -18,11 +18,10 @@ namespace paddle {
namespace operators { namespace operators {
template <> template <>
void GetAccumulators<paddle::platform::CPUDeviceContext>( void GetAccumulators<phi::CPUContext>(const framework::ExecutionContext& ctx,
const framework::ExecutionContext& ctx, int64_t* num_updates,
int64_t* num_updates, int64_t* num_accumulates,
int64_t* num_accumulates, int64_t* old_num_accumulates) {
int64_t* old_num_accumulates) {
auto* in_old_num_accumulates = ctx.Input<Tensor>("in_old_num_accumulates"); auto* in_old_num_accumulates = ctx.Input<Tensor>("in_old_num_accumulates");
auto* in_num_accumulates = ctx.Input<Tensor>("in_num_accumulates"); auto* in_num_accumulates = ctx.Input<Tensor>("in_num_accumulates");
auto* in_num_updates = ctx.Input<Tensor>("in_num_updates"); auto* in_num_updates = ctx.Input<Tensor>("in_num_updates");
...@@ -33,11 +32,10 @@ void GetAccumulators<paddle::platform::CPUDeviceContext>( ...@@ -33,11 +32,10 @@ void GetAccumulators<paddle::platform::CPUDeviceContext>(
} }
template <> template <>
void SetAccumulators<paddle::platform::CPUDeviceContext>( void SetAccumulators<phi::CPUContext>(const framework::ExecutionContext& ctx,
const framework::ExecutionContext& ctx, int64_t num_updates,
int64_t num_updates, int64_t num_accumulates,
int64_t num_accumulates, int64_t old_num_accumulates) {
int64_t old_num_accumulates) {
auto* out_old_num_accumulates = ctx.Output<Tensor>("out_old_num_accumulates"); auto* out_old_num_accumulates = ctx.Output<Tensor>("out_old_num_accumulates");
auto* out_num_accumulates = ctx.Output<Tensor>("out_num_accumulates"); auto* out_num_accumulates = ctx.Output<Tensor>("out_num_accumulates");
auto* out_num_updates = ctx.Output<Tensor>("out_num_updates"); auto* out_num_updates = ctx.Output<Tensor>("out_num_updates");
...@@ -217,7 +215,6 @@ REGISTER_OPERATOR( ...@@ -217,7 +215,6 @@ REGISTER_OPERATOR(
ops::AverageAccumulatesOpMaker, ops::AverageAccumulatesOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(average_accumulates,
average_accumulates, ops::AverageAccumulatesKernel<phi::CPUContext, float>,
ops::AverageAccumulatesKernel<paddle::platform::CPUDeviceContext, float>, ops::AverageAccumulatesKernel<phi::CPUContext, double>);
ops::AverageAccumulatesKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -166,7 +166,6 @@ REGISTER_OPERATOR(batch_fc_grad, ...@@ -166,7 +166,6 @@ REGISTER_OPERATOR(batch_fc_grad,
ops::BatchFCGradOp, ops::BatchFCGradOp,
ops::BatchFCGradOpNoNeedBufferVarsInferer); ops::BatchFCGradOpNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(batch_fc,
batch_fc, ops::BatchFCKernel<phi::CPUContext, float>,
ops::BatchFCKernel<paddle::platform::CPUDeviceContext, float>, ops::BatchFCKernel<phi::CPUContext, double>);
ops::BatchFCKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -141,7 +141,7 @@ void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor( ...@@ -141,7 +141,7 @@ void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
auto cpu_place = std::unique_ptr<paddle::platform::CPUPlace>( auto cpu_place = std::unique_ptr<paddle::platform::CPUPlace>(
new paddle::platform::CPUPlace()); new paddle::platform::CPUPlace());
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place); phi::CPUContext cpu_ctx(*cpu_place);
framework::LoD lod; framework::LoD lod;
lod.push_back(source_level_lod); lod.push_back(source_level_lod);
......
...@@ -143,9 +143,8 @@ REGISTER_OPERATOR(beam_search, ...@@ -143,9 +143,8 @@ REGISTER_OPERATOR(beam_search,
ops::BeamSearchOp, ops::BeamSearchOp,
ops::BeamSearchOpMaker, ops::BeamSearchOpMaker,
ops::BeamSearchInferVarType); ops::BeamSearchInferVarType);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(beam_search,
beam_search, ops::BeamSearchOpKernel<phi::CPUContext, float>,
ops::BeamSearchOpKernel<paddle::platform::CPUDeviceContext, float>, ops::BeamSearchOpKernel<phi::CPUContext, double>,
ops::BeamSearchOpKernel<paddle::platform::CPUDeviceContext, double>, ops::BeamSearchOpKernel<phi::CPUContext, int>,
ops::BeamSearchOpKernel<paddle::platform::CPUDeviceContext, int>, ops::BeamSearchOpKernel<phi::CPUContext, int64_t>);
ops::BeamSearchOpKernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -172,11 +172,9 @@ REGISTER_OPERATOR(bmm, ...@@ -172,11 +172,9 @@ REGISTER_OPERATOR(bmm,
ops::BmmOpGradMaker<paddle::framework::OpDesc>, ops::BmmOpGradMaker<paddle::framework::OpDesc>,
ops::BmmOpGradMaker<paddle::imperative::OpBase>); ops::BmmOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(bmm_grad, ops::BmmOpGrad); REGISTER_OPERATOR(bmm_grad, ops::BmmOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(bmm,
bmm, ops::BmmKernel<phi::CPUContext, float>,
ops::BmmKernel<paddle::platform::CPUDeviceContext, float>, ops::BmmKernel<phi::CPUContext, double>);
ops::BmmKernel<paddle::platform::CPUDeviceContext, double>); REGISTER_OP_CPU_KERNEL(bmm_grad,
REGISTER_OP_CPU_KERNEL( ops::BmmGradKernel<phi::CPUContext, float>,
bmm_grad, ops::BmmGradKernel<phi::CPUContext, double>);
ops::BmmGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::BmmGradKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -176,7 +176,7 @@ class BprLossGradMaker : public framework::SingleGradOpMaker<T> { ...@@ -176,7 +176,7 @@ class BprLossGradMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPUCtx = paddle::platform::CPUDeviceContext; using CPUCtx = phi::CPUContext;
REGISTER_OPERATOR(bpr_loss, REGISTER_OPERATOR(bpr_loss,
ops::BprLossOp, ops::BprLossOp,
......
...@@ -141,7 +141,7 @@ class CastOp : public framework::OperatorWithKernel { ...@@ -141,7 +141,7 @@ class CastOp : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
// cast use phi kernel, so no need to REGISTER_OP_CPU_KERNEL here. // cast use phi kernel, so no need to REGISTER_OP_CPU_KERNEL here.
REGISTER_OPERATOR(cast, REGISTER_OPERATOR(cast,
......
...@@ -146,7 +146,7 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(CenterLossGradNoNeedBufVarsInferer, "X"); ...@@ -146,7 +146,7 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(CenterLossGradNoNeedBufVarsInferer, "X");
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPUCtx = paddle::platform::CPUDeviceContext; using CPUCtx = phi::CPUContext;
REGISTER_OPERATOR(center_loss, REGISTER_OPERATOR(center_loss,
ops::CenterLossOp, ops::CenterLossOp,
......
...@@ -113,13 +113,11 @@ It accomplishes the execution of the instruction according to the following step ...@@ -113,13 +113,11 @@ It accomplishes the execution of the instruction according to the following step
} // namespace paddle::operators } // namespace paddle::operators
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPUDeviceContext = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
cinn_instruction_run, cinn_instruction_run,
ops::CinnInstructionRunOp, ops::CinnInstructionRunOp,
ops::CinnInstructionRunOpMaker, ops::CinnInstructionRunOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(cinn_instruction_run,
cinn_instruction_run, ops::CinnInstructionRunOpKernel<phi::CPUContext, float>);
ops::CinnInstructionRunOpKernel<CPUDeviceContext, float>);
...@@ -189,6 +189,5 @@ REGISTER_OPERATOR( ...@@ -189,6 +189,5 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
/* see [Why use single type kernel] */ /* see [Why use single type kernel] */
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(cinn_launch,
cinn_launch, ops::CinnLaunchOpKernel<phi::CPUContext, float>);
ops::CinnLaunchOpKernel<paddle::platform::CPUDeviceContext, float>);
...@@ -19,6 +19,5 @@ REGISTER_OP_WITHOUT_GRADIENT(clip_by_norm, ...@@ -19,6 +19,5 @@ REGISTER_OP_WITHOUT_GRADIENT(clip_by_norm,
ops::ClipByNormOp, ops::ClipByNormOp,
ops::ClipByNormOpMaker); ops::ClipByNormOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(clip_by_norm,
clip_by_norm, ops::ClipByNormKernel<phi::CPUContext, float>);
ops::ClipByNormKernel<paddle::platform::CPUDeviceContext, float>);
...@@ -511,11 +511,10 @@ REGISTER_OPERATOR(coalesce_tensor, ...@@ -511,11 +511,10 @@ REGISTER_OPERATOR(coalesce_tensor,
paddle::operators::CoalesceTensorOpMaker); paddle::operators::CoalesceTensorOpMaker);
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(coalesce_tensor,
coalesce_tensor, ops::CoalesceTensorOpKernel<phi::CPUContext, int>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, int>, ops::CoalesceTensorOpKernel<phi::CPUContext, float>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, float>, ops::CoalesceTensorOpKernel<phi::CPUContext, double>);
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, double>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
...@@ -550,20 +549,18 @@ REGISTER_OP_XPU_KERNEL( ...@@ -550,20 +549,18 @@ REGISTER_OP_XPU_KERNEL(
#if defined(PADDLE_WITH_ASCEND_CL) #if defined(PADDLE_WITH_ASCEND_CL)
REGISTER_OP_NPU_KERNEL( REGISTER_OP_NPU_KERNEL(
coalesce_tensor, coalesce_tensor,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, int>, ops::CoalesceTensorOpKernel<phi::CPUContext, int>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, float>, ops::CoalesceTensorOpKernel<phi::CPUContext, float>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, ops::CoalesceTensorOpKernel<phi::CPUContext, plat::float16>,
plat::float16>, ops::CoalesceTensorOpKernel<phi::CPUContext, double>);
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, double>);
#endif #endif
#if defined(PADDLE_WITH_MLU) #if defined(PADDLE_WITH_MLU)
REGISTER_OP_MLU_KERNEL( REGISTER_OP_MLU_KERNEL(
coalesce_tensor, coalesce_tensor,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, ops::CoalesceTensorOpKernel<phi::CPUContext, plat::float16>,
plat::float16>, ops::CoalesceTensorOpKernel<phi::CPUContext, int>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, int>, ops::CoalesceTensorOpKernel<phi::CPUContext, float>);
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, float>);
#endif #endif
REGISTER_OP_VERSION(coalesce_tensor) REGISTER_OP_VERSION(coalesce_tensor)
......
...@@ -73,10 +73,9 @@ REGISTER_OP_WITHOUT_GRADIENT(allreduce, ...@@ -73,10 +73,9 @@ REGISTER_OP_WITHOUT_GRADIENT(allreduce,
ops::AllReduceOp, ops::AllReduceOp,
ops::AllReduceOpMaker); ops::AllReduceOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(allreduce,
allreduce, ops::AllReduceOpKernel<phi::CPUContext, float>,
ops::AllReduceOpKernel<plat::CPUDeviceContext, float>, ops::AllReduceOpKernel<phi::CPUContext, double>,
ops::AllReduceOpKernel<plat::CPUDeviceContext, double>, ops::AllReduceOpKernel<phi::CPUContext, int>,
ops::AllReduceOpKernel<plat::CPUDeviceContext, int>, ops::AllReduceOpKernel<phi::CPUContext, int64_t>,
ops::AllReduceOpKernel<plat::CPUDeviceContext, int64_t>, ops::AllReduceOpKernel<phi::CPUContext, plat::float16>);
ops::AllReduceOpKernel<plat::CPUDeviceContext, plat::float16>);
...@@ -143,12 +143,10 @@ REGISTER_OPERATOR(complex, ...@@ -143,12 +143,10 @@ REGISTER_OPERATOR(complex,
REGISTER_OPERATOR(complex_grad, ops::ComplexGradOp); REGISTER_OPERATOR(complex_grad, ops::ComplexGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(complex,
complex, ops::ComplexKernel<phi::CPUContext, float>,
ops::ComplexKernel<paddle::platform::CPUDeviceContext, float>, ops::ComplexKernel<phi::CPUContext, double>);
ops::ComplexKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(complex_grad,
REGISTER_OP_CPU_KERNEL( ops::ComplexGradKernel<phi::CPUContext, float>,
complex_grad, ops::ComplexGradKernel<phi::CPUContext, double>);
ops::ComplexGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ComplexGradKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -161,12 +161,10 @@ REGISTER_OPERATOR(as_real, ...@@ -161,12 +161,10 @@ REGISTER_OPERATOR(as_real,
ops::AsRealGradMaker<paddle::framework::OpDesc>, ops::AsRealGradMaker<paddle::framework::OpDesc>,
ops::AsRealGradMaker<paddle::imperative::OpBase>); ops::AsRealGradMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(as_complex,
as_complex, ops::AsComplexKernel<phi::CPUContext, float>,
ops::AsComplexKernel<paddle::platform::CPUDeviceContext, float>, ops::AsComplexKernel<phi::CPUContext, double>);
ops::AsComplexKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(as_real,
REGISTER_OP_CPU_KERNEL( ops::AsRealKernel<phi::CPUContext, float>,
as_real, ops::AsRealKernel<phi::CPUContext, double>);
ops::AsRealKernel<paddle::platform::CPUDeviceContext, float>,
ops::AsRealKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -249,8 +249,6 @@ REGISTER_OPERATOR(cos_sim, ...@@ -249,8 +249,6 @@ REGISTER_OPERATOR(cos_sim,
ops::CosSimGradOpMaker<paddle::framework::OpDesc>, ops::CosSimGradOpMaker<paddle::framework::OpDesc>,
ops::CosSimGradOpMaker<paddle::imperative::OpBase>); ops::CosSimGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(cos_sim_grad, ops::CosSimOpGrad); REGISTER_OPERATOR(cos_sim_grad, ops::CosSimOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(cos_sim, ops::CosSimKernel<phi::CPUContext, float>);
cos_sim, ops::CosSimKernel<paddle::platform::CPUDeviceContext, float>); REGISTER_OP_CPU_KERNEL(cos_sim_grad,
REGISTER_OP_CPU_KERNEL( ops::CosSimGradKernel<phi::CPUContext, float>);
cos_sim_grad,
ops::CosSimGradKernel<paddle::platform::CPUDeviceContext, float>);
...@@ -215,7 +215,6 @@ namespace ops = paddle::operators; ...@@ -215,7 +215,6 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(crf_decoding, REGISTER_OP_WITHOUT_GRADIENT(crf_decoding,
ops::CRFDecodingOp, ops::CRFDecodingOp,
ops::CRFDecodingOpMaker); ops::CRFDecodingOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(crf_decoding,
crf_decoding, ops::CRFDecodingOpKernel<phi::CPUContext, float>,
ops::CRFDecodingOpKernel<paddle::platform::CPUDeviceContext, float>, ops::CRFDecodingOpKernel<phi::CPUContext, double>);
ops::CRFDecodingOpKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -223,14 +223,12 @@ REGISTER_OPERATOR(crop, ...@@ -223,14 +223,12 @@ REGISTER_OPERATOR(crop,
ops::CropGradOpMaker<paddle::imperative::OpBase>, ops::CropGradOpMaker<paddle::imperative::OpBase>,
ops::GropNoNeedBufferVarInferer); ops::GropNoNeedBufferVarInferer);
REGISTER_OPERATOR(crop_grad, ops::CropOpGrad); REGISTER_OPERATOR(crop_grad, ops::CropOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(crop,
crop, ops::CropKernel<phi::CPUContext, float>,
ops::CropKernel<paddle::platform::CPUDeviceContext, float>, ops::CropKernel<phi::CPUContext, double>);
ops::CropKernel<paddle::platform::CPUDeviceContext, double>); REGISTER_OP_CPU_KERNEL(crop_grad,
REGISTER_OP_CPU_KERNEL( ops::CropGradKernel<phi::CPUContext, float>,
crop_grad, ops::CropGradKernel<phi::CPUContext, double>);
ops::CropGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::CropGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
crop, crop,
......
...@@ -320,18 +320,16 @@ REGISTER_OPERATOR(crop_tensor, ...@@ -320,18 +320,16 @@ REGISTER_OPERATOR(crop_tensor,
ops::CropTensorGradOpMaker<paddle::framework::OpDesc>, ops::CropTensorGradOpMaker<paddle::framework::OpDesc>,
ops::CropTensorGradOpMaker<paddle::imperative::OpBase>); ops::CropTensorGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(crop_tensor_grad, ops::CropTensorOpGrad); REGISTER_OPERATOR(crop_tensor_grad, ops::CropTensorOpGrad);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(crop_tensor,
crop_tensor, ops::CropTensorKernel<phi::CPUContext, float>,
ops::CropTensorKernel<paddle::platform::CPUDeviceContext, float>, ops::CropTensorKernel<phi::CPUContext, double>,
ops::CropTensorKernel<paddle::platform::CPUDeviceContext, double>, ops::CropTensorKernel<phi::CPUContext, int>,
ops::CropTensorKernel<paddle::platform::CPUDeviceContext, int>, ops::CropTensorKernel<phi::CPUContext, int64_t>);
ops::CropTensorKernel<paddle::platform::CPUDeviceContext, int64_t>); REGISTER_OP_CPU_KERNEL(crop_tensor_grad,
REGISTER_OP_CPU_KERNEL( ops::CropTensorGradKernel<phi::CPUContext, float>,
crop_tensor_grad, ops::CropTensorGradKernel<phi::CPUContext, double>,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, float>, ops::CropTensorGradKernel<phi::CPUContext, int>,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, double>, ops::CropTensorGradKernel<phi::CPUContext, int64_t>);
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
crop_tensor, crop_tensor,
......
...@@ -421,7 +421,7 @@ class CrossEntropyGradOpMaker2 : public framework::SingleGradOpMaker<T> { ...@@ -421,7 +421,7 @@ class CrossEntropyGradOpMaker2 : public framework::SingleGradOpMaker<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPUCtx = paddle::platform::CPUDeviceContext; using CPUCtx = phi::CPUContext;
REGISTER_OPERATOR(cross_entropy, REGISTER_OPERATOR(cross_entropy,
ops::CrossEntropyOpBase, ops::CrossEntropyOpBase,
......
...@@ -129,7 +129,6 @@ REGISTER_OPERATOR( ...@@ -129,7 +129,6 @@ REGISTER_OPERATOR(
ops::CTCAlignOpMaker, ops::CTCAlignOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(ctc_align,
ctc_align, ops::CTCAlignKernel<phi::CPUContext, int>,
ops::CTCAlignKernel<paddle::platform::CPUDeviceContext, int>, ops::CTCAlignKernel<phi::CPUContext, int64_t>);
ops::CTCAlignKernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -145,7 +145,7 @@ class LogcumsumexpGradMaker : public framework::SingleGradOpMaker<T> { ...@@ -145,7 +145,7 @@ class LogcumsumexpGradMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
DECLARE_INFER_SHAPE_FUNCTOR(cumsum, DECLARE_INFER_SHAPE_FUNCTOR(cumsum,
CumsumInferShapeFunctor, CumsumInferShapeFunctor,
PD_INFER_META(phi::CumInferMeta)); PD_INFER_META(phi::CumInferMeta));
......
...@@ -287,8 +287,7 @@ The required data format for this layer is one of the following: ...@@ -287,8 +287,7 @@ The required data format for this layer is one of the following:
}; };
template <typename T> template <typename T>
class DataNormKernel<platform::CPUDeviceContext, T> class DataNormKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
: public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
// const bool is_test = ctx.Attr<bool>("is_test"); // const bool is_test = ctx.Attr<bool>("is_test");
...@@ -533,8 +532,7 @@ class DataNormGradOp : public framework::OperatorWithKernel { ...@@ -533,8 +532,7 @@ class DataNormGradOp : public framework::OperatorWithKernel {
}; };
template <typename T> template <typename T>
class DataNormGradKernel<platform::CPUDeviceContext, T> class DataNormGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
: public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
const auto *x = ctx.Input<Tensor>("X"); const auto *x = ctx.Input<Tensor>("X");
...@@ -788,14 +786,12 @@ REGISTER_OPERATOR(data_norm, ...@@ -788,14 +786,12 @@ REGISTER_OPERATOR(data_norm,
ops::DataNormGradMaker<paddle::imperative::OpBase>); ops::DataNormGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(data_norm_grad, ops::DataNormGradOp); REGISTER_OPERATOR(data_norm_grad, ops::DataNormGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(data_norm,
data_norm, ops::DataNormKernel<phi::CPUContext, float>,
ops::DataNormKernel<paddle::platform::CPUDeviceContext, float>, ops::DataNormKernel<phi::CPUContext, double>);
ops::DataNormKernel<paddle::platform::CPUDeviceContext, double>); REGISTER_OP_CPU_KERNEL(data_norm_grad,
REGISTER_OP_CPU_KERNEL( ops::DataNormGradKernel<phi::CPUContext, float>,
data_norm_grad, ops::DataNormGradKernel<phi::CPUContext, double>);
ops::DataNormGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::DataNormGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_VERSION(data_norm).AddCheckpoint( REGISTER_OP_VERSION(data_norm).AddCheckpoint(
R"ROC( R"ROC(
upgrad data_norm op by adding scale_w to support scale and shift.)ROC", upgrad data_norm op by adding scale_w to support scale and shift.)ROC",
......
...@@ -349,7 +349,7 @@ class DeformablePSROIPoolGradOp : public framework::OperatorWithKernel { ...@@ -349,7 +349,7 @@ class DeformablePSROIPoolGradOp : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
deformable_psroi_pooling, deformable_psroi_pooling,
ops::DeformablePSROIPoolOp, ops::DeformablePSROIPoolOp,
......
...@@ -33,8 +33,8 @@ namespace paddle { ...@@ -33,8 +33,8 @@ namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T>
struct DequantizeFunctor<platform::CPUDeviceContext, T> { struct DequantizeFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext& dev_ctx, void operator()(const phi::CPUContext& dev_ctx,
const framework::Tensor* in, const framework::Tensor* in,
const framework::Tensor* scale, const framework::Tensor* scale,
float max_range, float max_range,
...@@ -49,8 +49,8 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> { ...@@ -49,8 +49,8 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct DequantizeFunctor<platform::CPUDeviceContext, int8_t>; template struct DequantizeFunctor<phi::CPUContext, int8_t>;
template struct DequantizeFunctor<platform::CPUDeviceContext, int16_t>; template struct DequantizeFunctor<phi::CPUContext, int16_t>;
class DequantizeMaxAbsOp : public framework::OperatorWithKernel { class DequantizeMaxAbsOp : public framework::OperatorWithKernel {
public: public:
...@@ -102,7 +102,7 @@ $$Out = \frac{scale*X}{ max\_range }$$ ...@@ -102,7 +102,7 @@ $$Out = \frac{scale*X}{ max\_range }$$
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
dequantize_abs_max, dequantize_abs_max,
......
...@@ -32,8 +32,8 @@ namespace paddle { ...@@ -32,8 +32,8 @@ namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T>
struct DequantizeFunctor<platform::CPUDeviceContext, T> { struct DequantizeFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext& dev_ctx, void operator()(const phi::CPUContext& dev_ctx,
const framework::Tensor* in, const framework::Tensor* in,
const framework::Tensor* dict, const framework::Tensor* dict,
framework::Tensor* out) { framework::Tensor* out) {
...@@ -51,7 +51,7 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> { ...@@ -51,7 +51,7 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct DequantizeFunctor<platform::CPUDeviceContext, int8_t>; template struct DequantizeFunctor<phi::CPUContext, int8_t>;
class DequantizeLogOp : public framework::OperatorWithKernel { class DequantizeLogOp : public framework::OperatorWithKernel {
public: public:
...@@ -108,7 +108,7 @@ This calculation is an opposite operation of QuantizeLogOp: ...@@ -108,7 +108,7 @@ This calculation is an opposite operation of QuantizeLogOp:
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
dequantize_log, dequantize_log,
......
...@@ -200,7 +200,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> { ...@@ -200,7 +200,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
auto* match_indices = context.Output<Tensor>("ColToRowMatchIndices"); auto* match_indices = context.Output<Tensor>("ColToRowMatchIndices");
auto* match_dist = context.Output<Tensor>("ColToRowMatchDist"); auto* match_dist = context.Output<Tensor>("ColToRowMatchDist");
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>(); auto& dev_ctx = context.device_context<phi::CPUContext>();
auto col = dist_mat->dims()[1]; auto col = dist_mat->dims()[1];
...@@ -216,9 +216,9 @@ class BipartiteMatchKernel : public framework::OpKernel<T> { ...@@ -216,9 +216,9 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
match_indices->mutable_data<int>({n, col}, context.GetPlace()); match_indices->mutable_data<int>({n, col}, context.GetPlace());
match_dist->mutable_data<T>({n, col}, context.GetPlace()); match_dist->mutable_data<T>({n, col}, context.GetPlace());
phi::funcs::SetConstant<platform::CPUDeviceContext, int> iset; phi::funcs::SetConstant<phi::CPUContext, int> iset;
iset(dev_ctx, match_indices, static_cast<int>(-1)); iset(dev_ctx, match_indices, static_cast<int>(-1));
phi::funcs::SetConstant<platform::CPUDeviceContext, T> tset; phi::funcs::SetConstant<phi::CPUContext, T> tset;
tset(dev_ctx, match_dist, static_cast<T>(0)); tset(dev_ctx, match_dist, static_cast<T>(0));
int* indices = match_indices->data<int>(); int* indices = match_indices->data<int>();
......
...@@ -104,7 +104,6 @@ REGISTER_OPERATOR( ...@@ -104,7 +104,6 @@ REGISTER_OPERATOR(
ops::BoxClipOpMaker, ops::BoxClipOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(box_clip,
box_clip, ops::BoxClipKernel<phi::CPUContext, float>,
ops::BoxClipKernel<paddle::platform::CPUDeviceContext, float>, ops::BoxClipKernel<phi::CPUContext, double>);
ops::BoxClipKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -29,8 +29,7 @@ class BoxClipKernel : public framework::OpKernel<T> { ...@@ -29,8 +29,7 @@ class BoxClipKernel : public framework::OpKernel<T> {
auto* input_box = context.Input<LoDTensor>("Input"); auto* input_box = context.Input<LoDTensor>("Input");
auto* im_info = context.Input<LoDTensor>("ImInfo"); auto* im_info = context.Input<LoDTensor>("ImInfo");
auto* output_box = context.Output<LoDTensor>("Output"); auto* output_box = context.Output<LoDTensor>("Output");
auto& dev_ctx = auto& dev_ctx = context.template device_context<phi::CPUContext>();
context.template device_context<platform::CPUDeviceContext>();
output_box->mutable_data<T>(context.GetPlace()); output_box->mutable_data<T>(context.GetPlace());
if (input_box->lod().size()) { if (input_box->lod().size()) {
PADDLE_ENFORCE_EQ(input_box->lod().size(), PADDLE_ENFORCE_EQ(input_box->lod().size(),
......
...@@ -251,7 +251,6 @@ REGISTER_OPERATOR( ...@@ -251,7 +251,6 @@ REGISTER_OPERATOR(
ops::BoxCoderOpMaker, ops::BoxCoderOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(box_coder,
box_coder, ops::BoxCoderKernel<phi::CPUContext, float>,
ops::BoxCoderKernel<paddle::platform::CPUDeviceContext, float>, ops::BoxCoderKernel<phi::CPUContext, double>);
ops::BoxCoderKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -227,7 +227,6 @@ REGISTER_OPERATOR( ...@@ -227,7 +227,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(box_decoder_and_assign,
box_decoder_and_assign, ops::BoxDecoderAndAssignKernel<phi::CPUContext, float>,
ops::BoxDecoderAndAssignKernel<paddle::platform::CPUDeviceContext, float>, ops::BoxDecoderAndAssignKernel<phi::CPUContext, double>);
ops::BoxDecoderAndAssignKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -122,7 +122,7 @@ class GenerateMaskLabelsOp : public framework::OperatorWithKernel { ...@@ -122,7 +122,7 @@ class GenerateMaskLabelsOp : public framework::OperatorWithKernel {
* to encode class specific mask targets. * to encode class specific mask targets.
*/ */
template <typename T> template <typename T>
static inline void ExpandMaskTarget(const platform::CPUDeviceContext& ctx, static inline void ExpandMaskTarget(const phi::CPUContext& ctx,
const Tensor& masks, const Tensor& masks,
const Tensor& mask_class_labels, const Tensor& mask_class_labels,
const int resolution, const int resolution,
...@@ -150,7 +150,7 @@ static inline void ExpandMaskTarget(const platform::CPUDeviceContext& ctx, ...@@ -150,7 +150,7 @@ static inline void ExpandMaskTarget(const platform::CPUDeviceContext& ctx,
} }
template <typename T> template <typename T>
std::vector<Tensor> SampleMaskForOneImage(const platform::CPUDeviceContext& ctx, std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
const Tensor& im_info, const Tensor& im_info,
const Tensor& gt_classes, const Tensor& gt_classes,
const Tensor& is_crowd, const Tensor& is_crowd,
...@@ -391,7 +391,7 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> { ...@@ -391,7 +391,7 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
std::vector<size_t> lod0(1, 0); std::vector<size_t> lod0(1, 0);
int64_t num_mask = 0; int64_t num_mask = 0;
auto& dev_ctx = ctx.device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.device_context<phi::CPUContext>();
auto gt_classes_lod = gt_classes->lod().back(); auto gt_classes_lod = gt_classes->lod().back();
auto is_crowd_lod = is_crowd->lod().back(); auto is_crowd_lod = is_crowd->lod().back();
......
...@@ -168,7 +168,7 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { ...@@ -168,7 +168,7 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
}; };
template <typename T> template <typename T>
void Concat(const platform::CPUDeviceContext& context, void Concat(const phi::CPUContext& context,
const Tensor& in_tensor_a, const Tensor& in_tensor_a,
const Tensor& in_tensor_b, const Tensor& in_tensor_b,
Tensor* out_tensor) { Tensor* out_tensor) {
...@@ -176,24 +176,23 @@ void Concat(const platform::CPUDeviceContext& context, ...@@ -176,24 +176,23 @@ void Concat(const platform::CPUDeviceContext& context,
std::vector<Tensor> inputs; std::vector<Tensor> inputs;
inputs.emplace_back(in_tensor_a); inputs.emplace_back(in_tensor_a);
inputs.emplace_back(in_tensor_b); inputs.emplace_back(in_tensor_b);
math::ConcatFunctor<platform::CPUDeviceContext, T> concat_functor; math::ConcatFunctor<phi::CPUContext, T> concat_functor;
concat_functor(context, inputs, axis, out_tensor); concat_functor(context, inputs, axis, out_tensor);
} }
template <typename T> template <typename T>
std::vector<std::vector<int>> SampleFgBgGt( std::vector<std::vector<int>> SampleFgBgGt(const phi::CPUContext& context,
const platform::CPUDeviceContext& context, Tensor* iou,
Tensor* iou, const Tensor& is_crowd,
const Tensor& is_crowd, const int batch_size_per_im,
const int batch_size_per_im, const float fg_fraction,
const float fg_fraction, const float fg_thresh,
const float fg_thresh, const float bg_thresh_hi,
const float bg_thresh_hi, const float bg_thresh_lo,
const float bg_thresh_lo, std::minstd_rand engine,
std::minstd_rand engine, const bool use_random,
const bool use_random, const bool is_cascade_rcnn,
const bool is_cascade_rcnn, const Tensor& rpn_rois) {
const Tensor& rpn_rois) {
std::vector<int> fg_inds; std::vector<int> fg_inds;
std::vector<int> bg_inds; std::vector<int> bg_inds;
std::vector<int> mapped_gt_inds; std::vector<int> mapped_gt_inds;
...@@ -286,7 +285,7 @@ std::vector<std::vector<int>> SampleFgBgGt( ...@@ -286,7 +285,7 @@ std::vector<std::vector<int>> SampleFgBgGt(
} }
template <typename T> template <typename T>
void GatherBoxesLabels(const platform::CPUDeviceContext& context, void GatherBoxesLabels(const phi::CPUContext& context,
const Tensor& boxes, const Tensor& boxes,
const Tensor& max_overlap, const Tensor& max_overlap,
const Tensor& gt_boxes, const Tensor& gt_boxes,
...@@ -335,7 +334,7 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context, ...@@ -335,7 +334,7 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
template <typename T> template <typename T>
std::vector<Tensor> SampleRoisForOneImage( std::vector<Tensor> SampleRoisForOneImage(
const platform::CPUDeviceContext& context, const phi::CPUContext& context,
const Tensor& rpn_rois_in, const Tensor& rpn_rois_in,
const Tensor& gt_classes, const Tensor& gt_classes,
const Tensor& is_crowd, const Tensor& is_crowd,
...@@ -372,7 +371,7 @@ std::vector<Tensor> SampleRoisForOneImage( ...@@ -372,7 +371,7 @@ std::vector<Tensor> SampleRoisForOneImage(
Tensor roi_filter; Tensor roi_filter;
// Tensor box_filter; // Tensor box_filter;
if (keep.numel() == 0) { if (keep.numel() == 0) {
phi::funcs::SetConstant<platform::CPUDeviceContext, T> set_zero; phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace()); roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
set_zero(context, &roi_filter, static_cast<T>(0)); set_zero(context, &roi_filter, static_cast<T>(0));
} else { } else {
...@@ -597,7 +596,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> { ...@@ -597,7 +596,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
std::vector<size_t> lod0(1, 0); std::vector<size_t> lod0(1, 0);
int64_t num_rois = 0; int64_t num_rois = 0;
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>(); auto& dev_ctx = context.device_context<phi::CPUContext>();
auto rpn_rois_lod = rpn_rois->lod().back(); auto rpn_rois_lod = rpn_rois->lod().back();
auto gt_classes_lod = gt_classes->lod().back(); auto gt_classes_lod = gt_classes->lod().back();
......
...@@ -98,8 +98,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -98,8 +98,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
float min_size = context.Attr<float>("min_size"); float min_size = context.Attr<float>("min_size");
float eta = context.Attr<float>("eta"); float eta = context.Attr<float>("eta");
auto &dev_ctx = auto &dev_ctx = context.template device_context<phi::CPUContext>();
context.template device_context<platform::CPUDeviceContext>();
auto &scores_dim = scores->dims(); auto &scores_dim = scores->dims();
int64_t num = scores_dim[0]; int64_t num = scores_dim[0];
...@@ -122,7 +121,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -122,7 +121,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
scores_swap.mutable_data<T>({num, h_score, w_score, c_score}, scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
dev_ctx.GetPlace()); dev_ctx.GetPlace());
phi::funcs::Transpose<platform::CPUDeviceContext, T, 4> trans; phi::funcs::Transpose<phi::CPUContext, T, 4> trans;
std::vector<int> axis = {0, 2, 3, 1}; std::vector<int> axis = {0, 2, 3, 1};
trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis);
trans(dev_ctx, *scores, &scores_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis);
...@@ -181,7 +180,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -181,7 +180,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
} }
std::pair<Tensor, Tensor> ProposalForOneImage( std::pair<Tensor, Tensor> ProposalForOneImage(
const platform::CPUDeviceContext &ctx, const phi::CPUContext &ctx,
const Tensor &im_info_slice, const Tensor &im_info_slice,
const Tensor &anchors, const Tensor &anchors,
const Tensor &variances, const Tensor &variances,
...@@ -234,7 +233,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> { ...@@ -234,7 +233,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
FilterBoxes<T>(ctx, &proposals, min_size, im_info_slice, true, &keep); FilterBoxes<T>(ctx, &proposals, min_size, im_info_slice, true, &keep);
// Handle the case when there is no keep index left // Handle the case when there is no keep index left
if (keep.numel() == 0) { if (keep.numel() == 0) {
phi::funcs::SetConstant<platform::CPUDeviceContext, T> set_zero; phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace()); bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace());
set_zero(ctx, &bbox_sel, static_cast<T>(0)); set_zero(ctx, &bbox_sel, static_cast<T>(0));
Tensor scores_filter; Tensor scores_filter;
......
...@@ -99,8 +99,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -99,8 +99,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
float eta = context.Attr<float>("eta"); float eta = context.Attr<float>("eta");
bool pixel_offset = context.Attr<bool>("pixel_offset"); bool pixel_offset = context.Attr<bool>("pixel_offset");
auto &dev_ctx = auto &dev_ctx = context.template device_context<phi::CPUContext>();
context.template device_context<platform::CPUDeviceContext>();
auto &scores_dim = scores->dims(); auto &scores_dim = scores->dims();
int64_t num = scores_dim[0]; int64_t num = scores_dim[0];
...@@ -123,7 +122,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -123,7 +122,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
scores_swap.mutable_data<T>({num, h_score, w_score, c_score}, scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
dev_ctx.GetPlace()); dev_ctx.GetPlace());
phi::funcs::Transpose<platform::CPUDeviceContext, T, 4> trans; phi::funcs::Transpose<phi::CPUContext, T, 4> trans;
std::vector<int> axis = {0, 2, 3, 1}; std::vector<int> axis = {0, 2, 3, 1};
trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis); trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis);
trans(dev_ctx, *scores, &scores_swap, axis); trans(dev_ctx, *scores, &scores_swap, axis);
...@@ -183,7 +182,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -183,7 +182,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
} }
std::pair<Tensor, Tensor> ProposalForOneImage( std::pair<Tensor, Tensor> ProposalForOneImage(
const platform::CPUDeviceContext &ctx, const phi::CPUContext &ctx,
const Tensor &im_shape_slice, const Tensor &im_shape_slice,
const Tensor &anchors, const Tensor &anchors,
const Tensor &variances, const Tensor &variances,
...@@ -240,7 +239,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> { ...@@ -240,7 +239,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
ctx, &proposals, min_size, im_shape_slice, false, &keep, pixel_offset); ctx, &proposals, min_size, im_shape_slice, false, &keep, pixel_offset);
// Handle the case when there is no keep index left // Handle the case when there is no keep index left
if (keep.numel() == 0) { if (keep.numel() == 0) {
phi::funcs::SetConstant<platform::CPUDeviceContext, T> set_zero; phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace()); bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace());
set_zero(ctx, &bbox_sel, static_cast<T>(0)); set_zero(ctx, &bbox_sel, static_cast<T>(0));
Tensor scores_filter; Tensor scores_filter;
......
...@@ -113,7 +113,6 @@ REGISTER_OPERATOR( ...@@ -113,7 +113,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(iou_similarity,
iou_similarity, ops::IOUSimilarityKernel<phi::CPUContext, float>,
ops::IOUSimilarityKernel<paddle::platform::CPUDeviceContext, float>, ops::IOUSimilarityKernel<phi::CPUContext, double>);
ops::IOUSimilarityKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -356,7 +356,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> { ...@@ -356,7 +356,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
auto* outs = ctx.Output<LoDTensor>("Out"); auto* outs = ctx.Output<LoDTensor>("Out");
auto& score_dims = scores_input->dims(); auto& score_dims = scores_input->dims();
auto score_size = score_dims.size(); auto score_size = score_dims.size();
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
LoDTensor scores; LoDTensor scores;
LoDTensor boxes; LoDTensor boxes;
......
...@@ -403,7 +403,6 @@ REGISTER_OPERATOR( ...@@ -403,7 +403,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(mine_hard_examples,
mine_hard_examples, ops::MineHardExamplesKernel<phi::CPUContext, float>,
ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, float>, ops::MineHardExamplesKernel<phi::CPUContext, double>);
ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -219,7 +219,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> { ...@@ -219,7 +219,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
T nms_threshold = static_cast<T>(ctx.Attr<float>("nms_threshold")); T nms_threshold = static_cast<T>(ctx.Attr<float>("nms_threshold"));
T nms_eta = static_cast<T>(ctx.Attr<float>("nms_eta")); T nms_eta = static_cast<T>(ctx.Attr<float>("nms_eta"));
T score_threshold = static_cast<T>(ctx.Attr<float>("score_threshold")); T score_threshold = static_cast<T>(ctx.Attr<float>("score_threshold"));
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
int num_det = 0; int num_det = 0;
...@@ -361,7 +361,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> { ...@@ -361,7 +361,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
auto rois_num = ctx.Input<Tensor>("RoisNum"); auto rois_num = ctx.Input<Tensor>("RoisNum");
auto score_dims = scores->dims(); auto score_dims = scores->dims();
auto score_size = score_dims.size(); auto score_size = score_dims.size();
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
std::vector<std::map<int, std::vector<int>>> all_indices; std::vector<std::map<int, std::vector<int>>> all_indices;
std::vector<size_t> batch_starts = {0}; std::vector<size_t> batch_starts = {0};
......
...@@ -507,7 +507,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> { ...@@ -507,7 +507,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
int64_t box_dim = box_dims[2]; int64_t box_dim = box_dims[2];
int64_t out_dim = box_dim + 2; int64_t out_dim = box_dim + 2;
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
std::vector<std::vector<std::vector<T>>> all_nmsed_out; std::vector<std::vector<std::vector<T>>> all_nmsed_out;
std::vector<size_t> batch_starts = {0}; std::vector<size_t> batch_starts = {0};
......
...@@ -112,12 +112,11 @@ void AppendRpns(LoDTensor* out, int64_t offset, Tensor* to_add) { ...@@ -112,12 +112,11 @@ void AppendRpns(LoDTensor* out, int64_t offset, Tensor* to_add) {
} }
template <typename T> template <typename T>
std::vector<Tensor> FilterStraddleAnchor( std::vector<Tensor> FilterStraddleAnchor(const phi::CPUContext& context,
const platform::CPUDeviceContext& context, const Tensor* anchor,
const Tensor* anchor, const float rpn_straddle_thresh,
const float rpn_straddle_thresh, T im_height,
T im_height, T im_width) {
T im_width) {
std::vector<int> inds_inside; std::vector<int> inds_inside;
int anchor_num = anchor->dims()[0]; int anchor_num = anchor->dims()[0];
auto* anchor_data = anchor->data<T>(); auto* anchor_data = anchor->data<T>();
...@@ -154,7 +153,7 @@ std::vector<Tensor> FilterStraddleAnchor( ...@@ -154,7 +153,7 @@ std::vector<Tensor> FilterStraddleAnchor(
} }
template <typename T> template <typename T>
Tensor FilterCrowdGt(const platform::CPUDeviceContext& context, Tensor FilterCrowdGt(const phi::CPUContext& context,
Tensor* gt_boxes, Tensor* gt_boxes,
Tensor* is_crowd) { Tensor* is_crowd) {
int gt_num = gt_boxes->dims()[0]; int gt_num = gt_boxes->dims()[0];
...@@ -300,7 +299,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data, ...@@ -300,7 +299,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data,
} }
template <typename T> template <typename T>
std::vector<Tensor> SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx, std::vector<Tensor> SampleRpnFgBgGt(const phi::CPUContext& ctx,
const Tensor& anchor_by_gt_overlap, const Tensor& anchor_by_gt_overlap,
const int rpn_batch_size_per_im, const int rpn_batch_size_per_im,
const float rpn_positive_overlap, const float rpn_positive_overlap,
...@@ -437,7 +436,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> { ...@@ -437,7 +436,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
tgt_bbox->mutable_data<T>({max_num, 4}, place); tgt_bbox->mutable_data<T>({max_num, 4}, place);
tgt_lbl->mutable_data<int>({max_num, 1}, place); tgt_lbl->mutable_data<int>({max_num, 1}, place);
bbox_inside_weight->mutable_data<T>({max_num, 4}, place); bbox_inside_weight->mutable_data<T>({max_num, 4}, place);
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>(); auto& dev_ctx = context.device_context<phi::CPUContext>();
std::random_device rnd; std::random_device rnd;
std::minstd_rand engine; std::minstd_rand engine;
...@@ -857,11 +856,10 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel { ...@@ -857,11 +856,10 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel {
}; };
template <typename T> template <typename T>
std::vector<Tensor> FilterCrowdGtBoxLabel( std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context,
const platform::CPUDeviceContext& context, Tensor* gt_boxes,
Tensor* gt_boxes, Tensor* gt_labels,
Tensor* gt_labels, Tensor* is_crowd) {
Tensor* is_crowd) {
int gt_num = gt_boxes->dims()[0]; int gt_num = gt_boxes->dims()[0];
std::vector<int> not_crowd_inds; std::vector<int> not_crowd_inds;
auto* is_crowd_data = is_crowd->data<int>(); auto* is_crowd_data = is_crowd->data<int>();
...@@ -893,7 +891,7 @@ std::vector<Tensor> FilterCrowdGtBoxLabel( ...@@ -893,7 +891,7 @@ std::vector<Tensor> FilterCrowdGtBoxLabel(
} }
template <typename T> template <typename T>
std::vector<Tensor> GetAllFgBgGt(const platform::CPUDeviceContext& ctx, std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
const Tensor& anchor_by_gt_overlap, const Tensor& anchor_by_gt_overlap,
const Tensor& ncrowd_gt_labels, const Tensor& ncrowd_gt_labels,
const float positive_overlap, const float positive_overlap,
...@@ -1044,7 +1042,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> { ...@@ -1044,7 +1042,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
tgt_lbl->mutable_data<int>({max_num, 1}, place); tgt_lbl->mutable_data<int>({max_num, 1}, place);
bbox_inside_weight->mutable_data<T>({max_num, 4}, place); bbox_inside_weight->mutable_data<T>({max_num, 4}, place);
fg_num->mutable_data<int>({batch_num, 1}, place); fg_num->mutable_data<int>({batch_num, 1}, place);
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>(); auto& dev_ctx = context.device_context<phi::CPUContext>();
std::random_device rnd; std::random_device rnd;
std::minstd_rand engine; std::minstd_rand engine;
......
...@@ -266,12 +266,10 @@ REGISTER_OPERATOR(sigmoid_focal_loss, ...@@ -266,12 +266,10 @@ REGISTER_OPERATOR(sigmoid_focal_loss,
ops::SigmoidFocalLossGradOpMaker<paddle::framework::OpDesc>, ops::SigmoidFocalLossGradOpMaker<paddle::framework::OpDesc>,
ops::SigmoidFocalLossGradOpMaker<paddle::imperative::OpBase>); ops::SigmoidFocalLossGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(sigmoid_focal_loss_grad, ops::SigmoidFocalLossGradOp); REGISTER_OPERATOR(sigmoid_focal_loss_grad, ops::SigmoidFocalLossGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(sigmoid_focal_loss,
sigmoid_focal_loss, ops::SigmoidFocalLossKernel<phi::CPUContext, float>,
ops::SigmoidFocalLossKernel<paddle::platform::CPUDeviceContext, float>, ops::SigmoidFocalLossKernel<phi::CPUContext, double>);
ops::SigmoidFocalLossKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
sigmoid_focal_loss_grad, sigmoid_focal_loss_grad,
ops::SigmoidFocalLossGradKernel<paddle::platform::CPUDeviceContext, float>, ops::SigmoidFocalLossGradKernel<phi::CPUContext, float>,
ops::SigmoidFocalLossGradKernel<paddle::platform::CPUDeviceContext, ops::SigmoidFocalLossGradKernel<phi::CPUContext, double>);
double>);
...@@ -149,8 +149,8 @@ for i-th instance and each `id` of NegIndices in this instance: ...@@ -149,8 +149,8 @@ for i-th instance and each `id` of NegIndices in this instance:
}; };
template <typename T, typename WT> template <typename T, typename WT>
struct NegTargetAssignFunctor<platform::CPUDeviceContext, T, WT> { struct NegTargetAssignFunctor<phi::CPUContext, T, WT> {
void operator()(const platform::CPUDeviceContext& ctx, void operator()(const phi::CPUContext& ctx,
const int* neg_indices, const int* neg_indices,
const size_t* lod, const size_t* lod,
const int N, const int N,
...@@ -172,10 +172,8 @@ struct NegTargetAssignFunctor<platform::CPUDeviceContext, T, WT> { ...@@ -172,10 +172,8 @@ struct NegTargetAssignFunctor<platform::CPUDeviceContext, T, WT> {
} }
}; };
template struct NegTargetAssignFunctor<platform::CPUDeviceContext, int, float>; template struct NegTargetAssignFunctor<phi::CPUContext, int, float>;
template struct NegTargetAssignFunctor<platform::CPUDeviceContext, template struct NegTargetAssignFunctor<phi::CPUContext, float, float>;
float,
float>;
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -187,7 +185,6 @@ REGISTER_OPERATOR( ...@@ -187,7 +185,6 @@ REGISTER_OPERATOR(
ops::TargetAssignOpMaker, ops::TargetAssignOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(target_assign,
target_assign, ops::TargetAssignKernel<phi::CPUContext, int, float>,
ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, int, float>, ops::TargetAssignKernel<phi::CPUContext, float, float>);
ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, float, float>);
...@@ -179,12 +179,10 @@ REGISTER_OPERATOR(slogdeterminant, ...@@ -179,12 +179,10 @@ REGISTER_OPERATOR(slogdeterminant,
REGISTER_OPERATOR(slogdeterminant_grad, REGISTER_OPERATOR(slogdeterminant_grad,
ops::SlogDeterminantGradOp) // reuse det grad op ops::SlogDeterminantGradOp) // reuse det grad op
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(slogdeterminant,
slogdeterminant, ops::SlogDeterminantKernel<phi::CPUContext, float>,
ops::SlogDeterminantKernel<plat::CPUDeviceContext, float>, ops::SlogDeterminantKernel<phi::CPUContext, double>);
ops::SlogDeterminantKernel<plat::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(slogdeterminant_grad,
REGISTER_OP_CPU_KERNEL( ops::SlogDeterminantGradKernel<phi::CPUContext, float>,
slogdeterminant_grad, ops::SlogDeterminantGradKernel<phi::CPUContext, double>);
ops::SlogDeterminantGradKernel<plat::CPUDeviceContext, float>,
ops::SlogDeterminantGradKernel<plat::CPUDeviceContext, double>);
...@@ -66,6 +66,5 @@ REGISTER_OP_WITHOUT_GRADIENT(dgc_clip_by_norm, ...@@ -66,6 +66,5 @@ REGISTER_OP_WITHOUT_GRADIENT(dgc_clip_by_norm,
ops::DGCClipByNormOp, ops::DGCClipByNormOp,
ops::DGCClipByNormOpMaker); ops::DGCClipByNormOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(dgc_clip_by_norm,
dgc_clip_by_norm, ops::DGCClipByNormKernel<phi::CPUContext, float>);
ops::DGCClipByNormKernel<paddle::platform::CPUDeviceContext, float>);
...@@ -138,9 +138,8 @@ REGISTER_OPERATOR( ...@@ -138,9 +138,8 @@ REGISTER_OPERATOR(
ops::DiagEmbedOpMaker, ops::DiagEmbedOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(diag_embed,
diag_embed, ops::DiagEmbedKernel<phi::CPUContext, int>,
ops::DiagEmbedKernel<paddle::platform::CPUDeviceContext, int>, ops::DiagEmbedKernel<phi::CPUContext, float>,
ops::DiagEmbedKernel<paddle::platform::CPUDeviceContext, float>, ops::DiagEmbedKernel<phi::CPUContext, double>,
ops::DiagEmbedKernel<paddle::platform::CPUDeviceContext, double>, ops::DiagEmbedKernel<phi::CPUContext, int64_t>);
ops::DiagEmbedKernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -59,9 +59,8 @@ REGISTER_OPERATOR( ...@@ -59,9 +59,8 @@ REGISTER_OPERATOR(
ops::DiagOpMaker, ops::DiagOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(diag,
diag, ops::DiagKernel<phi::CPUContext, int>,
ops::DiagKernel<paddle::platform::CPUDeviceContext, int>, ops::DiagKernel<phi::CPUContext, float>,
ops::DiagKernel<paddle::platform::CPUDeviceContext, float>, ops::DiagKernel<phi::CPUContext, double>,
ops::DiagKernel<paddle::platform::CPUDeviceContext, double>, ops::DiagKernel<phi::CPUContext, int64_t>);
ops::DiagKernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -42,11 +42,11 @@ struct GammaCPUFunctor { ...@@ -42,11 +42,11 @@ struct GammaCPUFunctor {
}; };
template <typename T> template <typename T>
struct DirichletSampler<platform::CPUDeviceContext, T> { struct DirichletSampler<phi::CPUContext, T> {
void operator()(const framework::ExecutionContext& ctx, void operator()(const framework::ExecutionContext& ctx,
const Tensor* alpha, const Tensor* alpha,
Tensor* out) { Tensor* out) {
auto& dev_ctx = ctx.device_context<platform::CPUDeviceContext>(); auto& dev_ctx = ctx.device_context<phi::CPUContext>();
auto p_gen = framework::DefaultCPUGenerator(); auto p_gen = framework::DefaultCPUGenerator();
auto generator = p_gen->GetCPUEngine(); auto generator = p_gen->GetCPUEngine();
...@@ -71,8 +71,7 @@ struct DirichletSampler<platform::CPUDeviceContext, T> { ...@@ -71,8 +71,7 @@ struct DirichletSampler<platform::CPUDeviceContext, T> {
gamma_samples.data<T>(), gamma_samples.data<T>(),
standard_uniform, standard_uniform,
standard_normal); standard_normal);
platform::ForRange<platform::CPUDeviceContext> for_range(dev_ctx, platform::ForRange<phi::CPUContext> for_range(dev_ctx, alpha->numel());
alpha->numel());
for_range(gamma_functor); for_range(gamma_functor);
// normalize them into a simplex, along the last axis // normalize them into a simplex, along the last axis
...@@ -81,10 +80,10 @@ struct DirichletSampler<platform::CPUDeviceContext, T> { ...@@ -81,10 +80,10 @@ struct DirichletSampler<platform::CPUDeviceContext, T> {
new_shape[new_shape.size() - 1] = 1; new_shape[new_shape.size() - 1] = 1;
gamma_sum.mutable_data<T>(new_shape, dev_ctx.GetPlace()); gamma_sum.mutable_data<T>(new_shape, dev_ctx.GetPlace());
ReduceKernelFunctor<platform::CPUDeviceContext, T, SumFunctor>( ReduceKernelFunctor<phi::CPUContext, T, SumFunctor>(
&gamma_samples, &gamma_sum, {new_shape.size() - 1}, true, false, ctx) &gamma_samples, &gamma_sum, {new_shape.size() - 1}, true, false, ctx)
.template apply<T>(); .template apply<T>();
ElementwiseComputeEx<DivFunctor<T>, platform::CPUDeviceContext, T, T>( ElementwiseComputeEx<DivFunctor<T>, phi::CPUContext, T, T>(
ctx, &gamma_samples, &gamma_sum, -1, DivFunctor<T>(), out); ctx, &gamma_samples, &gamma_sum, -1, DivFunctor<T>(), out);
} }
}; };
...@@ -125,7 +124,5 @@ REGISTER_OP_WITHOUT_GRADIENT(dirichlet, ...@@ -125,7 +124,5 @@ REGISTER_OP_WITHOUT_GRADIENT(dirichlet,
paddle::operators::DirichletOpMaker); paddle::operators::DirichletOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
dirichlet, dirichlet,
paddle::operators::DirichletKernel<paddle::platform::CPUDeviceContext, paddle::operators::DirichletKernel<phi::CPUContext, float>,
float>, paddle::operators::DirichletKernel<phi::CPUContext, double>);
paddle::operators::DirichletKernel<paddle::platform::CPUDeviceContext,
double>);
...@@ -91,7 +91,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) { ...@@ -91,7 +91,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(Dropout, CPUDense) { TEST(Dropout, CPUDense) {
f::Scope scope; f::Scope scope;
p::CPUPlace place; p::CPUPlace place;
p::CPUDeviceContext ctx(place); phi::CPUContext ctx(place);
Compare(scope, ctx); Compare(scope, ctx);
} }
......
...@@ -164,19 +164,15 @@ REGISTER_OPERATOR(eig, ...@@ -164,19 +164,15 @@ REGISTER_OPERATOR(eig,
REGISTER_OPERATOR(eig_grad, ops::EigGradOp); REGISTER_OPERATOR(eig_grad, ops::EigGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(eig,
eig, ops::EigKernel<phi::CPUContext, float, complex64>,
ops::EigKernel<paddle::platform::CPUDeviceContext, float, complex64>, ops::EigKernel<phi::CPUContext, double, complex128>,
ops::EigKernel<paddle::platform::CPUDeviceContext, double, complex128>, ops::EigKernel<phi::CPUContext, complex64, complex64>,
ops::EigKernel<paddle::platform::CPUDeviceContext, complex64, complex64>, ops::EigKernel<phi::CPUContext, complex128, complex128>);
ops::EigKernel<paddle::platform::CPUDeviceContext, complex128, complex128>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
eig_grad, eig_grad,
ops::EigGradKernel<paddle::platform::CPUDeviceContext, float, complex64>, ops::EigGradKernel<phi::CPUContext, float, complex64>,
ops::EigGradKernel<paddle::platform::CPUDeviceContext, double, complex128>, ops::EigGradKernel<phi::CPUContext, double, complex128>,
ops:: ops::EigGradKernel<phi::CPUContext, complex64, complex64>,
EigGradKernel<paddle::platform::CPUDeviceContext, complex64, complex64>, ops::EigGradKernel<phi::CPUContext, complex128, complex128>);
ops::EigGradKernel<paddle::platform::CPUDeviceContext,
complex128,
complex128>);
...@@ -70,7 +70,7 @@ void TransposeTwoAxis(const Tensor& input, ...@@ -70,7 +70,7 @@ void TransposeTwoAxis(const Tensor& input,
permute[axis2] = axis1; permute[axis2] = axis1;
transposed_input->mutable_data<T>(input.dims(), context.GetPlace()); transposed_input->mutable_data<T>(input.dims(), context.GetPlace());
auto& dev_ctx = context.template device_context<platform::CPUDeviceContext>(); auto& dev_ctx = context.template device_context<phi::CPUContext>();
TransCompute<DeviceContext, T>( TransCompute<DeviceContext, T>(
input.dims().size(), dev_ctx, input, transposed_input, permute); input.dims().size(), dev_ctx, input, transposed_input, permute);
......
...@@ -86,10 +86,9 @@ REGISTER_OPERATOR(eigvals, ...@@ -86,10 +86,9 @@ REGISTER_OPERATOR(eigvals,
ops::EigvalsOp, ops::EigvalsOp,
ops::EigvalsOpMaker, ops::EigvalsOpMaker,
ops::EigvalsOpVarTypeInference); ops::EigvalsOpVarTypeInference);
REGISTER_OP_CPU_KERNEL(eigvals, REGISTER_OP_CPU_KERNEL(
ops::EigvalsKernel<plat::CPUDeviceContext, float>, eigvals,
ops::EigvalsKernel<plat::CPUDeviceContext, double>, ops::EigvalsKernel<phi::CPUContext, float>,
ops::EigvalsKernel<plat::CPUDeviceContext, ops::EigvalsKernel<phi::CPUContext, double>,
paddle::platform::complex<float>>, ops::EigvalsKernel<phi::CPUContext, paddle::platform::complex<float>>,
ops::EigvalsKernel<plat::CPUDeviceContext, ops::EigvalsKernel<phi::CPUContext, paddle::platform::complex<double>>);
paddle::platform::complex<double>>);
...@@ -151,24 +151,23 @@ REGISTER_OPERATOR(eigvalsh, ...@@ -151,24 +151,23 @@ REGISTER_OPERATOR(eigvalsh,
ops::EigvalshGradOpMaker<paddle::imperative::OpBase>); ops::EigvalshGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(eigvalsh_grad, ops::EigvalshGradOp); REGISTER_OPERATOR(eigvalsh_grad, ops::EigvalshGradOp);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(eigvalsh,
eigvalsh, ops::EigvalshKernel<phi::CPUContext, float, float>,
ops::EigvalshKernel<paddle::platform::CPUDeviceContext, float, float>, ops::EigvalshKernel<phi::CPUContext, double, double>,
ops::EigvalshKernel<paddle::platform::CPUDeviceContext, double, double>, ops::EigvalshKernel<phi::CPUContext,
ops::EigvalshKernel<paddle::platform::CPUDeviceContext, float,
float, paddle::platform::complex<float>>,
paddle::platform::complex<float>>, ops::EigvalshKernel<phi::CPUContext,
ops::EigvalshKernel<paddle::platform::CPUDeviceContext, double,
double, paddle::platform::complex<double>>);
paddle::platform::complex<double>>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
eigvalsh_grad, eigvalsh_grad,
ops::EigvalshGradKernel<paddle::platform::CPUDeviceContext, float, float>, ops::EigvalshGradKernel<phi::CPUContext, float, float>,
ops::EigvalshGradKernel<paddle::platform::CPUDeviceContext, double, double>, ops::EigvalshGradKernel<phi::CPUContext, double, double>,
ops::EigvalshGradKernel<paddle::platform::CPUDeviceContext, ops::EigvalshGradKernel<phi::CPUContext,
float, float,
paddle::platform::complex<float>>, paddle::platform::complex<float>>,
ops::EigvalshGradKernel<paddle::platform::CPUDeviceContext, ops::EigvalshGradKernel<phi::CPUContext,
double, double,
paddle::platform::complex<double>>); paddle::platform::complex<double>>);
...@@ -146,19 +146,17 @@ REGISTER_OPERATOR(expand_as, ...@@ -146,19 +146,17 @@ REGISTER_OPERATOR(expand_as,
REGISTER_OPERATOR(expand_as_grad, REGISTER_OPERATOR(expand_as_grad,
ops::ExpandAsGradOp, ops::ExpandAsGradOp,
ops::ExpandAsGradNoNeedBufVarsInferer); ops::ExpandAsGradNoNeedBufVarsInferer);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(expand_as,
expand_as, ops::ExpandAsKernel<phi::CPUContext, float>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, float>, ops::ExpandAsKernel<phi::CPUContext, double>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, double>, ops::ExpandAsKernel<phi::CPUContext, int>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, int>, ops::ExpandAsKernel<phi::CPUContext, int64_t>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, int64_t>, ops::ExpandAsKernel<phi::CPUContext, bool>);
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, bool>); REGISTER_OP_CPU_KERNEL(expand_as_grad,
REGISTER_OP_CPU_KERNEL( ops::ExpandAsGradKernel<phi::CPUContext, int>,
expand_as_grad, ops::ExpandAsGradKernel<phi::CPUContext, int64_t>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, int>, ops::ExpandAsGradKernel<phi::CPUContext, float>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, int64_t>, ops::ExpandAsGradKernel<phi::CPUContext, double>);
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, double>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
expand_as, expand_as,
......
...@@ -280,19 +280,17 @@ REGISTER_OPERATOR(expand_grad, ...@@ -280,19 +280,17 @@ REGISTER_OPERATOR(expand_grad,
ops::ExpandDoubleGradOpMaker<paddle::framework::OpDesc>, ops::ExpandDoubleGradOpMaker<paddle::framework::OpDesc>,
ops::ExpandDoubleGradOpMaker<paddle::imperative::OpBase>, ops::ExpandDoubleGradOpMaker<paddle::imperative::OpBase>,
ops::ExpandGradNoNeedBufVarsInferer); ops::ExpandGradNoNeedBufVarsInferer);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(expand,
expand, ops::ExpandKernel<phi::CPUContext, float>,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, float>, ops::ExpandKernel<phi::CPUContext, double>,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, double>, ops::ExpandKernel<phi::CPUContext, int>,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, int>, ops::ExpandKernel<phi::CPUContext, int64_t>,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, int64_t>, ops::ExpandKernel<phi::CPUContext, bool>);
ops::ExpandKernel<paddle::platform::CPUDeviceContext, bool>); REGISTER_OP_CPU_KERNEL(expand_grad,
REGISTER_OP_CPU_KERNEL( ops::ExpandGradKernel<phi::CPUContext, float>,
expand_grad, ops::ExpandGradKernel<phi::CPUContext, double>,
ops::ExpandGradKernel<paddle::platform::CPUDeviceContext, float>, ops::ExpandGradKernel<phi::CPUContext, int>,
ops::ExpandGradKernel<paddle::platform::CPUDeviceContext, double>, ops::ExpandGradKernel<phi::CPUContext, int64_t>);
ops::ExpandGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
expand, expand,
......
...@@ -62,8 +62,7 @@ class ExponentialOpInferVarType ...@@ -62,8 +62,7 @@ class ExponentialOpInferVarType
}; };
template <typename T> template <typename T>
class ExponentialKernel<platform::CPUDeviceContext, T> class ExponentialKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
: public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
auto *out = ctx.Output<framework::Tensor>("Out"); auto *out = ctx.Output<framework::Tensor>("Out");
...@@ -135,9 +134,8 @@ REGISTER_OPERATOR(exponential_grad, ...@@ -135,9 +134,8 @@ REGISTER_OPERATOR(exponential_grad,
ExponentialGradInferer); ExponentialGradInferer);
REGISTER_OP_CPU_KERNEL(exponential, REGISTER_OP_CPU_KERNEL(exponential,
ops::ExponentialKernel<plat::CPUDeviceContext, float>, ops::ExponentialKernel<phi::CPUContext, float>,
ops::ExponentialKernel<plat::CPUDeviceContext, double>); ops::ExponentialKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(exponential_grad,
exponential_grad, ops::ExponentialGradKernel<phi::CPUContext, float>,
ops::ExponentialGradKernel<plat::CPUDeviceContext, float>, ops::ExponentialGradKernel<phi::CPUContext, double>);
ops::ExponentialGradKernel<plat::CPUDeviceContext, double>);
...@@ -23,8 +23,8 @@ namespace paddle { ...@@ -23,8 +23,8 @@ namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T>
struct DequantizeFunctor<platform::CPUDeviceContext, T> { struct DequantizeFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext& dev_ctx, void operator()(const phi::CPUContext& dev_ctx,
const framework::Tensor* in, const framework::Tensor* in,
const framework::Tensor* scale, const framework::Tensor* scale,
T max_range, T max_range,
...@@ -39,8 +39,8 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> { ...@@ -39,8 +39,8 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> {
}; };
template <typename T> template <typename T>
struct ChannelDequantizeFunctor<platform::CPUDeviceContext, T> { struct ChannelDequantizeFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext& dev_ctx, void operator()(const phi::CPUContext& dev_ctx,
const framework::Tensor* in, const framework::Tensor* in,
const framework::Tensor** scales, const framework::Tensor** scales,
const int scale_num, const int scale_num,
...@@ -139,10 +139,10 @@ struct ChannelDequantizeFunctor<platform::CPUDeviceContext, T> { ...@@ -139,10 +139,10 @@ struct ChannelDequantizeFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct DequantizeFunctor<platform::CPUDeviceContext, float>; template struct DequantizeFunctor<phi::CPUContext, float>;
template struct DequantizeFunctor<platform::CPUDeviceContext, double>; template struct DequantizeFunctor<phi::CPUContext, double>;
template struct ChannelDequantizeFunctor<platform::CPUDeviceContext, float>; template struct ChannelDequantizeFunctor<phi::CPUContext, float>;
template struct ChannelDequantizeFunctor<platform::CPUDeviceContext, double>; template struct ChannelDequantizeFunctor<phi::CPUContext, double>;
class FakeDequantizeMaxAbsOp : public framework::OperatorWithKernel { class FakeDequantizeMaxAbsOp : public framework::OperatorWithKernel {
public: public:
...@@ -269,7 +269,7 @@ Notes: In general, the per-channel quantization is only applied to weights and t ...@@ -269,7 +269,7 @@ Notes: In general, the per-channel quantization is only applied to weights and t
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
fake_dequantize_max_abs, fake_dequantize_max_abs,
......
...@@ -32,8 +32,8 @@ struct Compare { ...@@ -32,8 +32,8 @@ struct Compare {
}; };
template <typename T> template <typename T>
struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> { struct FindAbsMaxFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, void operator()(const phi::CPUContext &ctx,
const T *in, const T *in,
const int num, const int num,
T *out) { T *out) {
...@@ -41,11 +41,11 @@ struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> { ...@@ -41,11 +41,11 @@ struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct FindAbsMaxFunctor<platform::CPUDeviceContext, float>; template struct FindAbsMaxFunctor<phi::CPUContext, float>;
template <typename T> template <typename T>
struct FindChannelAbsMaxFunctor<platform::CPUDeviceContext, T> { struct FindChannelAbsMaxFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in_tensor, const framework::Tensor &in_tensor,
const int quant_axis, const int quant_axis,
T *out_abs_max) { T *out_abs_max) {
...@@ -86,11 +86,11 @@ struct FindChannelAbsMaxFunctor<platform::CPUDeviceContext, T> { ...@@ -86,11 +86,11 @@ struct FindChannelAbsMaxFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct FindChannelAbsMaxFunctor<platform::CPUDeviceContext, float>; template struct FindChannelAbsMaxFunctor<phi::CPUContext, float>;
template <typename T> template <typename T>
struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> { struct ClipAndFakeQuantFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in, const framework::Tensor &in,
const framework::Tensor &scale, const framework::Tensor &scale,
const int bin_cnt, const int bin_cnt,
...@@ -98,7 +98,7 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> { ...@@ -98,7 +98,7 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
framework::Tensor *out) { framework::Tensor *out) {
T s = scale.data<T>()[0]; T s = scale.data<T>()[0];
T inv_s = inverse(s); T inv_s = inverse(s);
platform::Transform<platform::CPUDeviceContext> trans; platform::Transform<phi::CPUContext> trans;
if (round_type == 0) { if (round_type == 0) {
trans(ctx, trans(ctx,
in.data<T>(), in.data<T>(),
...@@ -117,11 +117,11 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> { ...@@ -117,11 +117,11 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, float>; template struct ClipAndFakeQuantFunctor<phi::CPUContext, float>;
template <typename T> template <typename T>
struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> { struct ClipAndFakeQuantDequantFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in, const framework::Tensor &in,
const framework::Tensor &scale, const framework::Tensor &scale,
const int bin_cnt, const int bin_cnt,
...@@ -130,7 +130,7 @@ struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> { ...@@ -130,7 +130,7 @@ struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
T s = scale.data<T>()[0]; T s = scale.data<T>()[0];
T inv_s = inverse(s); T inv_s = inverse(s);
platform::Transform<platform::CPUDeviceContext> trans; platform::Transform<phi::CPUContext> trans;
if (round_type == 0) { if (round_type == 0) {
trans(ctx, trans(ctx,
in.data<T>(), in.data<T>(),
...@@ -151,12 +151,11 @@ struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> { ...@@ -151,12 +151,11 @@ struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
} }
} }
}; };
template struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, template struct ClipAndFakeQuantDequantFunctor<phi::CPUContext, float>;
float>;
template <typename T> template <typename T>
struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> { struct ChannelClipAndFakeQuantFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in, const framework::Tensor &in,
const framework::Tensor &scale, const framework::Tensor &scale,
const int bin_cnt, const int bin_cnt,
...@@ -176,7 +175,7 @@ struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> { ...@@ -176,7 +175,7 @@ struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
auto *out_data = out->mutable_data<T>(ctx.GetPlace()); auto *out_data = out->mutable_data<T>(ctx.GetPlace());
auto in_dims = in.dims(); auto in_dims = in.dims();
const int64_t channel = in_dims[quant_axis]; const int64_t channel = in_dims[quant_axis];
platform::Transform<platform::CPUDeviceContext> trans; platform::Transform<phi::CPUContext> trans;
if (quant_axis == 0) { if (quant_axis == 0) {
const int64_t channel_size = in.numel() / channel; const int64_t channel_size = in.numel() / channel;
for (int64_t i = 0; i < channel; i++) { for (int64_t i = 0; i < channel; i++) {
...@@ -235,11 +234,10 @@ struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> { ...@@ -235,11 +234,10 @@ struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, template struct ChannelClipAndFakeQuantFunctor<phi::CPUContext, float>;
float>;
template <typename T> template <typename T>
struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, T> { struct ChannelClipFakeQuantDequantFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in, const framework::Tensor &in,
const framework::Tensor &scale, const framework::Tensor &scale,
const int bin_cnt, const int bin_cnt,
...@@ -258,7 +256,7 @@ struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, T> { ...@@ -258,7 +256,7 @@ struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
auto *out_data = out->mutable_data<T>(ctx.GetPlace()); auto *out_data = out->mutable_data<T>(ctx.GetPlace());
auto in_dims = in.dims(); auto in_dims = in.dims();
const int64_t channel = in_dims[quant_axis]; const int64_t channel = in_dims[quant_axis];
platform::Transform<platform::CPUDeviceContext> trans; platform::Transform<phi::CPUContext> trans;
if (quant_axis == 0) { if (quant_axis == 0) {
const int64_t channel_size = in.numel() / channel; const int64_t channel_size = in.numel() / channel;
for (int i = 0; i < channel; i++) { for (int i = 0; i < channel; i++) {
...@@ -326,11 +324,10 @@ struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, T> { ...@@ -326,11 +324,10 @@ struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, template struct ChannelClipFakeQuantDequantFunctor<phi::CPUContext, float>;
float>;
template <typename T> template <typename T>
struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, T> { struct FindRangeAbsMaxFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, void operator()(const phi::CPUContext &ctx,
const framework::Tensor &cur_scale, const framework::Tensor &cur_scale,
const framework::Tensor &last_scale, const framework::Tensor &last_scale,
const framework::Tensor &iter, const framework::Tensor &iter,
...@@ -349,18 +346,17 @@ struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, T> { ...@@ -349,18 +346,17 @@ struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, T> {
max = cur; max = cur;
} else if (fabs(removed - max) < 1e-6) { } else if (fabs(removed - max) < 1e-6) {
int size = (it > window_size) ? window_size : it; int size = (it > window_size) ? window_size : it;
FindAbsMaxFunctor<platform::CPUDeviceContext, T>()( FindAbsMaxFunctor<phi::CPUContext, T>()(ctx, scale_arr, size, &max);
ctx, scale_arr, size, &max);
} }
out_scale->mutable_data<T>(ctx.GetPlace())[0] = max; out_scale->mutable_data<T>(ctx.GetPlace())[0] = max;
} }
}; };
template struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, float>; template struct FindRangeAbsMaxFunctor<phi::CPUContext, float>;
template <typename T> template <typename T>
struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext, T> { struct FindMovingAverageAbsMaxFunctor<phi::CPUContext, T> {
void operator()(const platform::CPUDeviceContext &ctx, void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in_accum, const framework::Tensor &in_accum,
const framework::Tensor &in_state, const framework::Tensor &in_state,
const T *cur_scale, const T *cur_scale,
...@@ -382,8 +378,7 @@ struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext, T> { ...@@ -382,8 +378,7 @@ struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext, T> {
} }
}; };
template struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext, template struct FindMovingAverageAbsMaxFunctor<phi::CPUContext, float>;
float>;
class FakeQuantOrWithDequantAbsMaxOp : public framework::OperatorWithKernel { class FakeQuantOrWithDequantAbsMaxOp : public framework::OperatorWithKernel {
public: public:
...@@ -968,7 +963,7 @@ class StrightThroughEstimatorMaker : public framework::SingleGradOpMaker<T> { ...@@ -968,7 +963,7 @@ class StrightThroughEstimatorMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext; using CPU = phi::CPUContext;
REGISTER_OPERATOR( REGISTER_OPERATOR(
fake_quantize_abs_max, fake_quantize_abs_max,
......
...@@ -223,7 +223,6 @@ REGISTER_OPERATOR( ...@@ -223,7 +223,6 @@ REGISTER_OPERATOR(
ops::FCOpMaker, ops::FCOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(fc,
fc, ops::FCOpKernel<phi::CPUContext, float>,
ops::FCOpKernel<paddle::platform::CPUDeviceContext, float>, ops::FCOpKernel<phi::CPUContext, double>);
ops::FCOpKernel<paddle::platform::CPUDeviceContext, double>);
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册