未验证 提交 755438a7 编写于 作者: L Leo Chen 提交者: GitHub

unify cpu context, part2 (#44012)

* fix init()

* delete test_device_context

* replace CPUDeviceContext with CPUContext

* fix test_scalar

* remove dot_op.cc

* fix compile
上级 09096aeb
......@@ -251,7 +251,7 @@ void EagerGroup::ConcatTensors(const platform::Place &place) {
"Please recompile or reinstall Paddle with NCCL support."));
#endif
} else if (platform::is_cpu_place(place)) {
auto *default_ctx = static_cast<platform::CPUDeviceContext *>(
auto *default_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(place));
ConcatTensorsWithType(
*default_ctx, dense_tensors_, &dense_contents_, dtype_);
......@@ -274,7 +274,7 @@ void EagerGroup::SplitTensors(const platform::Place &place) {
"Please recompile or reinstall Paddle with NCCL support."));
#endif
} else if (platform::is_cpu_place(place)) {
auto *default_ctx = static_cast<platform::CPUDeviceContext *>(
auto *default_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(place));
SplitTensorsWithType(
*default_ctx, &dense_contents_, &dense_tensors_, dtype_);
......@@ -891,7 +891,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
"Please recompile or reinstall Paddle with NCCL support."));
#endif
} else if (platform::is_cpu_place(inner_place_)) {
dev_ctx = static_cast<platform::CPUDeviceContext *>(
dev_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(inner_place_));
} else {
PADDLE_THROW(platform::errors::Unimplemented(
......
......@@ -31,9 +31,9 @@ namespace paddle {
namespace distributed {
template <typename T>
inline phi::funcs::BlasT<paddle::platform::CPUDeviceContext, T> GetBlas() {
paddle::platform::CPUDeviceContext cpu_ctx;
return phi::funcs::GetBlas<paddle::platform::CPUDeviceContext, T>(cpu_ctx);
inline phi::funcs::BlasT<phi::CPUContext, T> GetBlas() {
phi::CPUContext cpu_ctx;
return phi::funcs::GetBlas<phi::CPUContext, T>(cpu_ctx);
}
template <typename T>
......
......@@ -353,11 +353,12 @@ void Communicator::RpcRecvSparse(const std::string &varname,
bool training = true;
auto status = _worker_ptr->PullSparseParam((float **)push_g_vec.data(),
table_id, // NOLINT
sparse_push_keys.data(),
sparse_push_keys.size(),
training);
auto status =
_worker_ptr->PullSparseParam(static_cast<float **>(push_g_vec.data()),
table_id,
sparse_push_keys.data(),
sparse_push_keys.size(),
training);
status.wait();
return;
}
......@@ -1184,12 +1185,12 @@ void GeoCommunicator::SendDense(const CommContext &send_ctx) {
auto &t_latest = var_latest->Get<framework::LoDTensor>();
auto t_timestamp = var_timestamp->GetMutable<framework::LoDTensor>();
paddle::platform::CPUDeviceContext cpu_ctx;
phi::CPUContext cpu_ctx;
auto *var_delta = delta_scope_->Var(varname);
auto *t_delta = var_delta->GetMutable<framework::LoDTensor>();
t_delta->mutable_data<float>(t_latest.dims(), cpu_ctx.GetPlace());
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
blas.VSUB(t_latest.numel(),
t_latest.data<float>(),
t_timestamp->data<float>(),
......@@ -1218,7 +1219,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) {
RpcRecvDense(varnames, table_id, pserver_scope_.get());
// 2.1 pserver - old => delta; 2.2 latest + old => latest 2.3 old => pserver
paddle::platform::CPUDeviceContext cpu_ctx;
phi::CPUContext cpu_ctx;
for (auto &varname : varnames) {
auto *var_latest = recv_scope_->FindVar(varname);
auto t_latest = var_latest->GetMutable<framework::LoDTensor>();
......@@ -1233,7 +1234,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) {
auto *t_delta = var_delta->GetMutable<framework::LoDTensor>();
t_delta->mutable_data<float>(t_latest->dims(), cpu_ctx.GetPlace());
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
blas.VSUB(t_latest->numel(),
t_pserver.data<float>(),
t_old->data<float>(),
......@@ -1334,7 +1335,7 @@ void GeoCommunicator::SendSparse(const std::string &varname,
auto *t_old = var_old->GetMutable<framework::LoDTensor>();
auto dims1 = t_latest.dims()[1];
paddle::platform::CPUDeviceContext cpu_ctx;
phi::CPUContext cpu_ctx;
auto *var_delta = delta_scope_->Var(varname);
auto *t_delta = var_delta->GetMutable<phi::SelectedRows>();
......@@ -1345,7 +1346,7 @@ void GeoCommunicator::SendSparse(const std::string &varname,
t_delta->set_rows(sparse_ids);
t_delta->set_height(t_latest.dims()[0]);
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
float coefficient = 1.0 / static_cast<float>(trainers_);
std::vector<float *> push_g_vec;
......@@ -1419,8 +1420,8 @@ void GeoCommunicator::RecvSparse(const std::string &varname,
std::vector<float> v_delta;
v_delta.resize(numel);
paddle::platform::CPUDeviceContext cpu_ctx;
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
phi::CPUContext cpu_ctx;
auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
for (auto j = 0; j < static_cast<int>(keys.size()); ++j) {
VLOG(5) << "DEBUG GeoCommunicator::RecvSparse recv sparse key" << keys[j]
......
......@@ -185,9 +185,8 @@ inline void MergeVars(const std::string &var_name,
}
// set output tensor to 0.
paddle::platform::CPUDeviceContext cpu_ctx;
phi::funcs::SetConstant<paddle::platform::CPUDeviceContext, T>
constant_functor;
phi::CPUContext cpu_ctx;
phi::funcs::SetConstant<phi::CPUContext, T> constant_functor;
constant_functor(cpu_ctx, out_t, static_cast<T>(0));
// sum all vars to out
auto result = EigenVector<T>::Flatten(*out_t);
......@@ -210,16 +209,13 @@ inline void MergeVars(const std::string &var_name,
for (auto &var : vars) {
inputs.push_back(&var->Get<phi::SelectedRows>());
}
paddle::platform::CPUDeviceContext dev_ctx;
phi::CPUContext dev_ctx;
if (merge_add) {
paddle::operators::math::scatter::
MergeAdd<paddle::platform::CPUDeviceContext, T>
merge_add;
paddle::operators::math::scatter::MergeAdd<phi::CPUContext, T> merge_add;
merge_add(dev_ctx, inputs, out_slr);
} else {
paddle::operators::math::scatter::
MergeAverage<paddle::platform::CPUDeviceContext, T>
merge_average;
paddle::operators::math::scatter::MergeAverage<phi::CPUContext, T>
merge_average;
merge_average(dev_ctx, inputs, out_slr);
}
......
......@@ -48,8 +48,7 @@ void CheckTensorHasNanOrInf(const std::string& api_name, const Tensor& tensor) {
#endif
return;
}
paddle::framework::details::tensor_check<
paddle::platform::CPUDeviceContext>(
paddle::framework::details::tensor_check<phi::CPUContext>(
api_name, tensor_name, *dense_tensor, place);
}
}
......
......@@ -90,9 +90,8 @@ REGISTER_OP_WITHOUT_GRADIENT(
test_op,
paddle::framework::TestOpWithKernel,
paddle::framework::OpKernelTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(
test_op,
paddle::framework::TestKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(test_op,
paddle::framework::TestKernel<phi::CPUContext, float>);
REGISTER_OP_CUDA_KERNEL(
test_op,
paddle::framework::TestKernel<paddle::platform::CUDADeviceContext, float>);
......
......@@ -44,8 +44,8 @@ void CastDataLayout::apply() {
auto place = ctx_->GetPlace();
if (platform::is_cpu_place(place)) {
phi::funcs::Transpose<platform::CPUDeviceContext, T, 4> trans4;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_);
phi::funcs::Transpose<phi::CPUContext, T, 4> trans4;
auto* context = static_cast<const phi::CPUContext*>(ctx_);
trans4(*context, in_, out_, axis_);
} else {
PADDLE_THROW(platform::errors::PreconditionNotMet(
......
......@@ -94,8 +94,8 @@ struct CastDataType {
auto* out_begin = out_->mutable_data<OutType>(in_.place());
if (platform::is_cpu_place(in_.place())) {
platform::Transform<platform::CPUDeviceContext> trans;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_);
platform::Transform<phi::CPUContext> trans;
auto* context = static_cast<const phi::CPUContext*>(ctx_);
trans(*context,
in_begin,
in_end,
......
......@@ -117,7 +117,7 @@ struct TestBroadcastOpHandle {
for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace();
place_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
ctxs_.emplace_back(new phi::CPUContext(p));
}
#if defined(PADDLE_WITH_XPU_BKCL)
bkcl_ctxs_.reset(nullptr);
......
......@@ -69,7 +69,7 @@ struct TestGatherOpHandle {
for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace();
gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
ctxs_.emplace_back(new phi::CPUContext(p));
}
}
}
......
......@@ -316,7 +316,7 @@ template <>
template <>
template <typename T>
void TensorCheckerVisitor<platform::CPUDeviceContext>::apply(
void TensorCheckerVisitor<phi::CPUContext>::apply(
typename std::enable_if<
std::is_floating_point<T>::value ||
std::is_same<T, ::paddle::platform::complex<float>>::value ||
......@@ -329,11 +329,11 @@ void TensorCheckerVisitor<platform::CPUDeviceContext>::apply(
}
template <>
void tensor_check<platform::CPUDeviceContext>(const std::string& op_type,
const std::string& var_name,
const framework::Tensor& tensor,
const platform::Place& place) {
TensorCheckerVisitor<platform::CPUDeviceContext> vistor(
void tensor_check<phi::CPUContext>(const std::string& op_type,
const std::string& var_name,
const framework::Tensor& tensor,
const platform::Place& place) {
TensorCheckerVisitor<phi::CPUContext> vistor(
op_type, var_name, tensor, place);
VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
}
......@@ -439,7 +439,7 @@ void CheckVarHasNanOrInf(const std::string& op_type,
#endif
return;
}
tensor_check<platform::CPUDeviceContext>(op_type, var_name, *tensor, place);
tensor_check<phi::CPUContext>(op_type, var_name, *tensor, place);
}
void CheckVarHasNanOrInf(const std::string& op_type,
......
......@@ -81,7 +81,7 @@ struct TestReduceOpHandle {
for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace();
gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
ctxs_.emplace_back(new phi::CPUContext(p));
}
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
nccl_ctxs_.reset(nullptr);
......
......@@ -144,7 +144,7 @@ LoDTensor LodExpand(const LoDTensor& source,
auto slice = tensor.Slice(elem, elem + 1);
TensorCopy(source.Slice(ins, ins + 1),
platform::CPUPlace(),
platform::CPUDeviceContext(),
phi::CPUContext(),
&slice);
}
}
......
......@@ -232,9 +232,8 @@ class OpKernelTest : public paddle::framework::OpKernel<T> {
REGISTER_OP_WITHOUT_GRADIENT(op_with_kernel,
paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestMaker);
REGISTER_OP_CPU_KERNEL(
op_with_kernel,
paddle::framework::OpKernelTest<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(op_with_kernel,
paddle::framework::OpKernelTest<phi::CPUContext, float>);
REGISTER_OP_CUDA_KERNEL(
op_with_kernel,
......@@ -264,10 +263,9 @@ TEST(OperatorRegistrar, CUDA) {
}
static int op_test_value = 0;
using paddle::platform::CPUDeviceContext;
using paddle::platform::CUDADeviceContext;
using paddle::platform::DeviceContext;
using phi::CPUContext;
namespace paddle {
namespace framework {
......@@ -295,8 +293,7 @@ class OpMultiKernelTest : public paddle::framework::OpKernel<T> {
};
template <typename T>
class OpMultiKernelTest<CPUDeviceContext, T>
: public paddle::framework::OpKernel<T> {
class OpMultiKernelTest<CPUContext, T> : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const {
++op_test_value;
......@@ -319,7 +316,7 @@ class OpMultiKernelTest2 : public paddle::framework::OpKernel<T> {
};
template <typename T>
class OpMultiKernelTest2<CPUDeviceContext, T>
class OpMultiKernelTest2<CPUContext, T>
: public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const {
......@@ -342,16 +339,14 @@ class OpMultiKernelTest2<CUDADeviceContext, T>
REGISTER_OP_WITHOUT_GRADIENT(op_with_multi_kernel,
paddle::framework::OpWithMultiKernelTest,
paddle::framework::OpKernelTestMaker);
REGISTER_OP_KERNEL(
op_with_multi_kernel,
CPU,
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest<CPUDeviceContext, float>);
REGISTER_OP_KERNEL(
op_with_multi_kernel,
MKLDNN,
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest2<CPUDeviceContext, float>);
REGISTER_OP_KERNEL(op_with_multi_kernel,
CPU,
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest<CPUContext, float>);
REGISTER_OP_KERNEL(op_with_multi_kernel,
MKLDNN,
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest2<CPUContext, float>);
REGISTER_OP_KERNEL(
op_with_multi_kernel,
CUDA,
......
......@@ -420,16 +420,13 @@ REGISTER_OP_WITHOUT_GRADIENT(
REGISTER_OP_CPU_KERNEL(
indicate_lod_tensor_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
int>);
paddle::framework::EmptyTestKernel<phi::CPUContext, int>);
REGISTER_OP_CPU_KERNEL(
indicate_selected_rows_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
int>);
paddle::framework::EmptyTestKernel<phi::CPUContext, int>);
REGISTER_OP_CPU_KERNEL(
indicate_other_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
int>);
paddle::framework::EmptyTestKernel<phi::CPUContext, int>);
TEST(IndicateVarDataTypeTest, lodtensor) {
paddle::framework::InitDevices();
......@@ -599,16 +596,14 @@ REGISTER_OP_WITHOUT_GRADIENT(get_lod_level_test,
paddle::framework::GetSetLoDLevelTestMaker);
REGISTER_OP_CPU_KERNEL(
get_lod_level_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
float>);
paddle::framework::EmptyTestKernel<phi::CPUContext, float>);
REGISTER_OP_WITHOUT_GRADIENT(set_lod_level_test,
paddle::framework::SetLoDLevelTest,
paddle::framework::GetSetLoDLevelTestMaker);
REGISTER_OP_CPU_KERNEL(
set_lod_level_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
float>);
paddle::framework::EmptyTestKernel<phi::CPUContext, float>);
void SetGetLoDLevelTestMain(std::string op_type) {
paddle::framework::InitDevices({});
......
......@@ -66,7 +66,7 @@ struct ConvertToPhiContext {
};
template <>
struct ConvertToPhiContext<platform::CPUDeviceContext> {
struct ConvertToPhiContext<phi::CPUContext> {
using TYPE = phi::CPUContext;
};
......
......@@ -53,7 +53,7 @@ TEST_F(SelectedRowsTester, complete_dims) {
TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
phi::SelectedRows dst_tensor;
platform::CPUDeviceContext cpu_ctx(place_);
phi::CPUContext cpu_ctx(place_);
std::ostringstream oss;
SerializeToStream(oss, *selected_rows_, cpu_ctx);
......
......@@ -1253,7 +1253,7 @@ void TensorFromStream(std::istream& is,
is.seekg(seekg, is.cur);
void* buf;
platform::CPUDeviceContext ctx;
phi::CPUContext ctx;
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) ||
......@@ -1336,7 +1336,7 @@ void TensorFromStream(std::istream& is,
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
tensor->Resize(phi::make_ddim(dims));
void* buf;
platform::CPUDeviceContext ctx;
phi::CPUContext ctx;
size_t size = tensor->numel() * framework::SizeOfType(desc.data_type());
if (platform::is_gpu_place(dev_ctx.GetPlace()) ||
platform::is_xpu_place(dev_ctx.GetPlace()) ||
......
......@@ -24,7 +24,7 @@ namespace framework {
TEST(TensorCopy, Tensor) {
Tensor src_tensor;
Tensor dst_tensor;
platform::CPUDeviceContext cpu_ctx((platform::CPUPlace()));
phi::CPUContext cpu_ctx((platform::CPUPlace()));
int* src_ptr = src_tensor.mutable_data<int>(phi::make_ddim({3, 3}),
platform::CPUPlace());
......@@ -164,7 +164,7 @@ TEST(TensorFromVector, Tensor) {
// Copy to CPU Tensor
cpu_tensor.Resize(phi::make_ddim({3, 3}));
auto cpu_place = new paddle::platform::CPUPlace();
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place);
phi::CPUContext cpu_ctx(*cpu_place);
paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
// Copy to GPUTensor
......@@ -255,20 +255,23 @@ TEST(TensorToVector, Tensor) {
#endif
}
TEST(TensorToVector, Tensor_bool){{paddle::framework::Tensor src;
bool* src_ptr = src.mutable_data<bool>({3, 3}, paddle::platform::CPUPlace());
for (int i = 0; i < 3 * 3; ++i) {
src_ptr[i] = static_cast<bool>(i % 2);
}
TEST(TensorToVector, Tensor_bool) {
{
paddle::framework::Tensor src;
bool* src_ptr = src.mutable_data<bool>({3, 3}, paddle::platform::CPUPlace());
for (int i = 0; i < 3 * 3; ++i) {
src_ptr[i] = static_cast<bool>(i % 2);
}
paddle::platform::CPUPlace place;
std::vector<bool> dst;
paddle::framework::TensorToVector<bool>(src, &dst);
paddle::platform::CPUPlace place;
std::vector<bool> dst;
paddle::framework::TensorToVector<bool>(src, &dst);
for (int i = 0; i < 3 * 3; ++i) {
EXPECT_EQ(src_ptr[i], dst[i]);
for (int i = 0; i < 3 * 3; ++i) {
EXPECT_EQ(src_ptr[i], dst[i]);
}
}
} // namespace framework
#ifdef PADDLE_WITH_CUDA
{
std::vector<bool> src_vec = {
......@@ -325,7 +328,7 @@ for (int i = 0; i < 3 * 3; ++i) {
}
}
#endif
} // namespace paddle
}
TEST(TensorFromDLPack, Tensor) {
{
......@@ -334,7 +337,7 @@ TEST(TensorFromDLPack, Tensor) {
cpu_tensor.Resize(phi::make_ddim({3, 3}));
paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext cpu_ctx(cpu_place);
phi::CPUContext cpu_ctx(cpu_place);
paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
paddle::framework::DLPackTensor dlpack_tensor(cpu_tensor, 1);
......@@ -360,7 +363,7 @@ TEST(TensorFromDLPack, Tensor) {
// Copy to CPU Tensor
cpu_tensor.Resize(phi::make_ddim({3, 3}));
paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext cpu_ctx(cpu_place);
phi::CPUContext cpu_ctx(cpu_place);
paddle::framework::TensorFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
// Copy to GPUTensor
......@@ -502,7 +505,7 @@ TEST(Tensor, FromAndToStream) {
{
framework::Tensor dst_tensor;
auto place = new platform::CPUPlace();
platform::CPUDeviceContext cpu_ctx(*place);
phi::CPUContext cpu_ctx(*place);
std::ostringstream oss;
TensorToStream(oss, src_tensor, cpu_ctx);
......
......@@ -46,8 +46,8 @@ void GLOOParallelContext::Init() {
int port = std::stoi(addr[1]);
gloo_wrapper->SetHttpStore(host, port, "worker");
gloo_wrapper->Init();
device_ = std::unique_ptr<platform::CPUDeviceContext>(
new platform::CPUDeviceContext(platform::CPUPlace()));
device_ = std::unique_ptr<phi::CPUContext>(
new phi::CPUContext(platform::CPUPlace()));
device_->SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(platform::CPUPlace())
.get());
......@@ -200,7 +200,7 @@ void GLOOParallelContext::Broadcast(framework::Variable *src, int ring_id) {
paddle::platform::DeviceContext *GLOOParallelContext::GetDeviceContext(
int ring_id) {
// return the CPUDeviceContext
// return the CPUContext
return device_.get();
}
......
......@@ -64,7 +64,7 @@ class GLOOParallelContext : public ParallelContext {
void AllReduce(const phi::SelectedRows& src, phi::SelectedRows* dst);
private:
std::unique_ptr<platform::CPUDeviceContext> device_;
std::unique_ptr<phi::CPUContext> device_;
};
} // namespace imperative
......
......@@ -85,9 +85,9 @@ class TensorAddFunctor : public boost::static_visitor<> {
: numel_(numel), x_(x), y_(y) {}
void operator()(const platform::CPUPlace& place) const {
platform::CPUDeviceContext* ctx = dynamic_cast<platform::CPUDeviceContext*>(
phi::CPUContext* ctx = dynamic_cast<phi::CPUContext*>(
platform::DeviceContextPool::Instance().Get(place));
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, T>(*ctx);
auto blas = phi::funcs::GetBlas<phi::CPUContext, T>(*ctx);
blas.AXPY(numel_, 1., x_, y_);
}
......@@ -438,7 +438,7 @@ void TensorAdd(const VarType& src, VarType* dst) {
place));
#endif
} else if (platform::is_cpu_place(place)) {
return TensorAddImpl<platform::CPUDeviceContext, platform::float16>(
return TensorAddImpl<phi::CPUContext, platform::float16>(
src_tensor, dst_tensor, place);
}
}
......@@ -455,7 +455,7 @@ void TensorAdd(const VarType& src, VarType* dst) {
place));
#endif
} else if (platform::is_cpu_place(place)) {
return TensorAddImpl<platform::CPUDeviceContext, platform::bfloat16>(
return TensorAddImpl<phi::CPUContext, platform::bfloat16>(
src_tensor, dst_tensor, place);
}
}
......@@ -498,8 +498,8 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(platform::CUDADeviceContext, double);
} else {
#endif
PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(platform::CPUDeviceContext, float);
PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(platform::CPUDeviceContext, double);
PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(phi::CPUContext, float);
PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(phi::CPUContext, double);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
}
#endif
......@@ -550,8 +550,8 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
PADDLE_SELECTED_ROWS_ADD_TENSOR(platform::CUDADeviceContext, double);
} else {
#endif
PADDLE_SELECTED_ROWS_ADD_TENSOR(platform::CPUDeviceContext, float);
PADDLE_SELECTED_ROWS_ADD_TENSOR(platform::CPUDeviceContext, double);
PADDLE_SELECTED_ROWS_ADD_TENSOR(phi::CPUContext, float);
PADDLE_SELECTED_ROWS_ADD_TENSOR(phi::CPUContext, double);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
}
#endif
......@@ -613,8 +613,8 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
PADDLE_SELECTED_ROWS_ADD(platform::CUDADeviceContext, double);
} else {
#endif
PADDLE_SELECTED_ROWS_ADD(platform::CPUDeviceContext, float);
PADDLE_SELECTED_ROWS_ADD(platform::CPUDeviceContext, double);
PADDLE_SELECTED_ROWS_ADD(phi::CPUContext, float);
PADDLE_SELECTED_ROWS_ADD(phi::CPUContext, double);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
}
#endif
......
......@@ -53,12 +53,11 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) {
}
framework::VisitDataTypeForHIP(
dtype_,
DivNRanksForAllReduce<platform::CPUDeviceContext>(
tensor, nranks, context));
DivNRanksForAllReduce<phi::CPUContext>(tensor, nranks, context));
#else
framework::VisitDataType(dtype_,
DivNRanksForAllReduce<platform::CPUDeviceContext>(
tensor, nranks, context));
framework::VisitDataType(
dtype_,
DivNRanksForAllReduce<phi::CPUContext>(tensor, nranks, context));
#endif
VLOG(4) << "after div 2" << *tensor;
} else if (platform::is_xpu_place(tensor->place())) {
......@@ -328,11 +327,10 @@ void Group::ConcatTensors(const platform::DeviceContext &context) {
"Please recompile or reinstall Paddle with CNCL support."));
#endif
} else if (platform::is_cpu_place(place)) {
ConcatTensorsWithType(
static_cast<const platform::CPUDeviceContext &>(context),
dense_tensors_,
&dense_contents_,
dtype_);
ConcatTensorsWithType(static_cast<const phi::CPUContext &>(context),
dense_tensors_,
&dense_contents_,
dtype_);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Concat grad tensor not supported on place (%s)", place));
......@@ -390,11 +388,10 @@ void Group::SplitTensors(const platform::DeviceContext &context) {
"Please recompile or reinstall Paddle with CNCL support."));
#endif
} else if (platform::is_cpu_place(place)) {
SplitTensorsWithType(
static_cast<const platform::CPUDeviceContext &>(context),
&dense_contents_,
&dense_tensors_,
dtype_);
SplitTensorsWithType(static_cast<const phi::CPUContext &>(context),
&dense_contents_,
&dense_tensors_,
dtype_);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Split grad tensor not supported on place (%s)", place));
......
......@@ -234,7 +234,7 @@ void LiteSubgraphPass::SetUpEngine(
framework::Scope* scope,
const std::vector<std::string>& params) {
std::ostringstream os;
platform::CPUDeviceContext ctx;
phi::CPUContext ctx;
for (const auto& param : params) {
VLOG(3) << "Serialize param: " << param;
PADDLE_ENFORCE_NOT_NULL(
......
......@@ -365,7 +365,7 @@ void ConvertToMixedPrecision(const std::string& model_file,
[](framework::Scope* scope,
const std::vector<std::string>& params) -> std::string {
std::ostringstream os;
platform::CPUDeviceContext ctx;
phi::CPUContext ctx;
for (const auto& param : params) {
VLOG(3) << "Serialize param: " << param;
PADDLE_ENFORCE_NOT_NULL(
......
......@@ -81,7 +81,7 @@ void make_fake_model(std::string* model, std::string* param) {
ctx.PartialInitWithAllocator();
#else
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
phi::CPUContext ctx(place);
#endif
// Prepare variables.
std::vector<std::string> repetitive_params{"x", "y"};
......
......@@ -62,7 +62,7 @@ void IOConverterTester(const platform::DeviceContext& ctx) {
TEST(EngineIOConverterTester, DefaultCPU) {
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
phi::CPUContext ctx(place);
IOConverterTester(ctx);
}
......
......@@ -1469,20 +1469,16 @@ namespace plat = paddle::platform;
ops::ActivationOpGrad, \
ops::ActivationGradOpInplaceInferer);
#define REGISTER_ACTIVATION_CPU_KERNEL( \
act_type, op_name, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \
act_type, \
ops::ActivationKernel<paddle::platform::CPUDeviceContext, \
ops::functor<float>>, \
ops::ActivationKernel<paddle::platform::CPUDeviceContext, \
ops::functor<double>>); \
REGISTER_OP_CPU_KERNEL( \
act_type##_grad, \
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
ops::grad_functor<float>>, \
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
ops::grad_functor<double>>);
#define REGISTER_ACTIVATION_CPU_KERNEL( \
act_type, op_name, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \
act_type, \
ops::ActivationKernel<phi::CPUContext, ops::functor<float>>, \
ops::ActivationKernel<phi::CPUContext, ops::functor<double>>); \
REGISTER_OP_CPU_KERNEL( \
act_type##_grad, \
ops::ActivationGradKernel<phi::CPUContext, ops::grad_functor<float>>, \
ops::ActivationGradKernel<phi::CPUContext, ops::grad_functor<double>>);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
......
......@@ -122,12 +122,11 @@ REGISTER_OPERATOR(
ops::AddPositionEncodingGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(add_position_encoding_grad, ops::AddPositionEncodingOpGrad);
REGISTER_OP_CPU_KERNEL(
add_position_encoding,
ops::AddPositionEncodingKernel<plt::CPUDeviceContext, float>,
ops::AddPositionEncodingKernel<plt::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(add_position_encoding,
ops::AddPositionEncodingKernel<phi::CPUContext, float>,
ops::AddPositionEncodingKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(
add_position_encoding_grad,
ops::AddPositionEncodingGradKernel<plt::CPUDeviceContext, float>,
ops::AddPositionEncodingGradKernel<plt::CPUDeviceContext, double>);
ops::AddPositionEncodingGradKernel<phi::CPUContext, float>,
ops::AddPositionEncodingGradKernel<phi::CPUContext, double>);
......@@ -342,7 +342,7 @@ DECLARE_INPLACE_OP_INFERER(AffineChannelGradInplaceInferer,
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(affine_channel,
ops::AffineChannelOp,
......
......@@ -28,7 +28,7 @@ namespace operators {
using Tensor = framework::Tensor;
template <typename T>
struct Linspace<paddle::platform::CPUDeviceContext, T> {
struct Linspace<phi::CPUContext, T> {
void operator()(T start,
T end,
int count,
......@@ -282,14 +282,12 @@ REGISTER_OPERATOR(affine_grid,
ops::AffineGridGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(affine_grid_grad, ops::AffineGridOpGrad);
REGISTER_OP_CPU_KERNEL(
affine_grid,
ops::AffineGridOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::AffineGridOpKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
affine_grid_grad,
ops::AffineGridGradOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::AffineGridGradOpKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(affine_grid,
ops::AffineGridOpKernel<phi::CPUContext, float>,
ops::AffineGridOpKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(affine_grid_grad,
ops::AffineGridGradOpKernel<phi::CPUContext, float>,
ops::AffineGridGradOpKernel<phi::CPUContext, double>);
REGISTER_OP_VERSION(affine_grid)
.AddCheckpoint(
......
......@@ -84,7 +84,7 @@ class AllcloseOpVarTypeInference : public framework::VarTypeInference {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
DECLARE_INFER_SHAPE_FUNCTOR(allclose,
AllcloseInferShapeFunctor,
......
......@@ -65,7 +65,7 @@ class AllocFloatStatusKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
alloc_float_status,
......
......@@ -95,7 +95,7 @@ template <typename T>
class CheckFiniteAndUnscaleCpuKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
const auto xs = ctx.MultiInput<framework::Tensor>("X");
const auto* scale = ctx.Input<framework::Tensor>("Scale");
auto outs = ctx.MultiOutput<framework::Tensor>("Out");
......@@ -106,11 +106,10 @@ class CheckFiniteAndUnscaleCpuKernel : public framework::OpKernel<T> {
*found_inf_data = false;
framework::Tensor is_finite =
ctx.AllocateTmpTensor<bool, platform::CPUDeviceContext>({1}, dev_ctx);
ctx.AllocateTmpTensor<bool, phi::CPUContext>({1}, dev_ctx);
bool* is_finite_data = is_finite.template data<bool>();
auto& dev = *ctx.template device_context<platform::CPUDeviceContext>()
.eigen_device();
auto& dev = *ctx.template device_context<phi::CPUContext>().eigen_device();
T inverse_scale = Inverse<T>(*scale_data);
for (size_t i = 0; i < xs.size(); ++i) {
......
......@@ -68,7 +68,7 @@ class ClearFloatStatusKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
clear_float_status,
......
......@@ -67,7 +67,7 @@ class GetFloatStatusKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
get_float_status,
......
......@@ -169,9 +169,9 @@ decr_every_n_nan_or_inf steps and each step some gradients are infinite.
};
template <typename T, bool IsFoundInfOnCPU>
class UpdateLossScalingFunctor<platform::CPUDeviceContext, T, IsFoundInfOnCPU> {
class UpdateLossScalingFunctor<phi::CPUContext, T, IsFoundInfOnCPU> {
public:
void operator()(const platform::CPUDeviceContext& ctx,
void operator()(const phi::CPUContext& ctx,
const bool* found_inf_data,
const T* pre_loss_scaling_data,
const int* good_in_data,
......@@ -203,9 +203,9 @@ class UpdateLossScalingFunctor<platform::CPUDeviceContext, T, IsFoundInfOnCPU> {
};
template <typename T>
class LazyZeros<platform::CPUDeviceContext, T> {
class LazyZeros<phi::CPUContext, T> {
public:
void operator()(const platform::CPUDeviceContext& dev_ctx,
void operator()(const phi::CPUContext& dev_ctx,
const bool* found_inf_data,
const std::vector<const framework::Tensor*>& xs,
const std::vector<framework::Tensor*>& outs) const {
......@@ -225,7 +225,7 @@ class LazyZeros<platform::CPUDeviceContext, T> {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
update_loss_scaling,
......
......@@ -116,20 +116,16 @@ REGISTER_OPERATOR(angle,
REGISTER_OP_CPU_KERNEL(
angle,
ops::AngleKernel<paddle::platform::CPUDeviceContext, float>,
ops::AngleKernel<paddle::platform::CPUDeviceContext, double>,
ops::AngleKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::AngleKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
ops::AngleKernel<phi::CPUContext, float>,
ops::AngleKernel<phi::CPUContext, double>,
ops::AngleKernel<phi::CPUContext, paddle::platform::complex<float>>,
ops::AngleKernel<phi::CPUContext, paddle::platform::complex<double>>);
REGISTER_OPERATOR(angle_grad, ops::AngleGradOp);
REGISTER_OP_CPU_KERNEL(
angle_grad,
ops::AngleGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::AngleGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::AngleGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::AngleGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<double>>);
ops::AngleGradKernel<phi::CPUContext, float>,
ops::AngleGradKernel<phi::CPUContext, double>,
ops::AngleGradKernel<phi::CPUContext, paddle::platform::complex<float>>,
ops::AngleGradKernel<phi::CPUContext, paddle::platform::complex<double>>);
......@@ -51,7 +51,7 @@ struct ArrayToLoDFunctor : public boost::static_visitor<void> {
void operator()(Place place) const {
auto &pool = platform::DeviceContextPool::Instance();
if (std::is_same<Place, platform::CPUPlace>::value) {
Apply(static_cast<platform::CPUDeviceContext *>(pool.Get(place)));
Apply(static_cast<phi::CPUContext *>(pool.Get(place)));
} else {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Apply(static_cast<platform::CUDADeviceContext *>(pool.Get(place)));
......
......@@ -22,7 +22,7 @@ limitations under the License. */
TEST(AssignOp, AssignLoDTensor) {
paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext ctx(cpu_place);
phi::CPUContext ctx(cpu_place);
paddle::framework::Variable output;
paddle::operators::AssignFunctor assign_functor(&output, ctx);
......@@ -47,7 +47,7 @@ TEST(AssignOp, AssignLoDTensor) {
TEST(AssignOp, AssignLoDTensorArray) {
paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext ctx(cpu_place);
phi::CPUContext ctx(cpu_place);
paddle::framework::Variable output;
paddle::operators::AssignFunctor assign_functor(&output, ctx);
......@@ -78,7 +78,7 @@ TEST(AssignOp, AssignLoDTensorArray) {
TEST(AssignOp, AssignSelectedRows) {
paddle::platform::CPUPlace cpu_place;
paddle::platform::CPUDeviceContext ctx(cpu_place);
phi::CPUContext ctx(cpu_place);
paddle::framework::Variable output;
paddle::operators::AssignFunctor assign_functor(&output, ctx);
......
......@@ -337,7 +337,7 @@ template <typename T>
class AttentionLSTMKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
using DeviceContext = paddle::platform::CPUDeviceContext;
using DeviceContext = phi::CPUContext;
auto* x = ctx.Input<LoDTensor>("X");
auto* h0 = ctx.Input<Tensor>("H0");
......@@ -416,10 +416,10 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
T* lstm_x_data = lstm_x->mutable_data<T>(ctx.GetPlace());
T* lstm_out_data = lstm_out->mutable_data<T>(ctx.GetPlace());
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, T>(ctx);
auto blas = phi::funcs::GetBlas<phi::CPUContext, T>(ctx);
// x(TxM) * fc (Mx1) part of atten_wgt(M+D)x1
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
phi::funcs::FCFunctor<DeviceContext, T> fc;
fc(dev_ctx,
total_T,
......
......@@ -18,11 +18,10 @@ namespace paddle {
namespace operators {
template <>
void GetAccumulators<paddle::platform::CPUDeviceContext>(
const framework::ExecutionContext& ctx,
int64_t* num_updates,
int64_t* num_accumulates,
int64_t* old_num_accumulates) {
void GetAccumulators<phi::CPUContext>(const framework::ExecutionContext& ctx,
int64_t* num_updates,
int64_t* num_accumulates,
int64_t* old_num_accumulates) {
auto* in_old_num_accumulates = ctx.Input<Tensor>("in_old_num_accumulates");
auto* in_num_accumulates = ctx.Input<Tensor>("in_num_accumulates");
auto* in_num_updates = ctx.Input<Tensor>("in_num_updates");
......@@ -33,11 +32,10 @@ void GetAccumulators<paddle::platform::CPUDeviceContext>(
}
template <>
void SetAccumulators<paddle::platform::CPUDeviceContext>(
const framework::ExecutionContext& ctx,
int64_t num_updates,
int64_t num_accumulates,
int64_t old_num_accumulates) {
void SetAccumulators<phi::CPUContext>(const framework::ExecutionContext& ctx,
int64_t num_updates,
int64_t num_accumulates,
int64_t old_num_accumulates) {
auto* out_old_num_accumulates = ctx.Output<Tensor>("out_old_num_accumulates");
auto* out_num_accumulates = ctx.Output<Tensor>("out_num_accumulates");
auto* out_num_updates = ctx.Output<Tensor>("out_num_updates");
......@@ -217,7 +215,6 @@ REGISTER_OPERATOR(
ops::AverageAccumulatesOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
average_accumulates,
ops::AverageAccumulatesKernel<paddle::platform::CPUDeviceContext, float>,
ops::AverageAccumulatesKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(average_accumulates,
ops::AverageAccumulatesKernel<phi::CPUContext, float>,
ops::AverageAccumulatesKernel<phi::CPUContext, double>);
......@@ -166,7 +166,6 @@ REGISTER_OPERATOR(batch_fc_grad,
ops::BatchFCGradOp,
ops::BatchFCGradOpNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(
batch_fc,
ops::BatchFCKernel<paddle::platform::CPUDeviceContext, float>,
ops::BatchFCKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(batch_fc,
ops::BatchFCKernel<phi::CPUContext, float>,
ops::BatchFCKernel<phi::CPUContext, double>);
......@@ -141,7 +141,7 @@ void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
auto cpu_place = std::unique_ptr<paddle::platform::CPUPlace>(
new paddle::platform::CPUPlace());
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place);
phi::CPUContext cpu_ctx(*cpu_place);
framework::LoD lod;
lod.push_back(source_level_lod);
......
......@@ -143,9 +143,8 @@ REGISTER_OPERATOR(beam_search,
ops::BeamSearchOp,
ops::BeamSearchOpMaker,
ops::BeamSearchInferVarType);
REGISTER_OP_CPU_KERNEL(
beam_search,
ops::BeamSearchOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::BeamSearchOpKernel<paddle::platform::CPUDeviceContext, double>,
ops::BeamSearchOpKernel<paddle::platform::CPUDeviceContext, int>,
ops::BeamSearchOpKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(beam_search,
ops::BeamSearchOpKernel<phi::CPUContext, float>,
ops::BeamSearchOpKernel<phi::CPUContext, double>,
ops::BeamSearchOpKernel<phi::CPUContext, int>,
ops::BeamSearchOpKernel<phi::CPUContext, int64_t>);
......@@ -172,11 +172,9 @@ REGISTER_OPERATOR(bmm,
ops::BmmOpGradMaker<paddle::framework::OpDesc>,
ops::BmmOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(bmm_grad, ops::BmmOpGrad);
REGISTER_OP_CPU_KERNEL(
bmm,
ops::BmmKernel<paddle::platform::CPUDeviceContext, float>,
ops::BmmKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
bmm_grad,
ops::BmmGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::BmmGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(bmm,
ops::BmmKernel<phi::CPUContext, float>,
ops::BmmKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(bmm_grad,
ops::BmmGradKernel<phi::CPUContext, float>,
ops::BmmGradKernel<phi::CPUContext, double>);
......@@ -176,7 +176,7 @@ class BprLossGradMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle
namespace ops = paddle::operators;
using CPUCtx = paddle::platform::CPUDeviceContext;
using CPUCtx = phi::CPUContext;
REGISTER_OPERATOR(bpr_loss,
ops::BprLossOp,
......
......@@ -141,7 +141,7 @@ class CastOp : public framework::OperatorWithKernel {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
// cast use phi kernel, so no need to REGISTER_OP_CPU_KERNEL here.
REGISTER_OPERATOR(cast,
......
......@@ -146,7 +146,7 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(CenterLossGradNoNeedBufVarsInferer, "X");
} // namespace paddle
namespace ops = paddle::operators;
using CPUCtx = paddle::platform::CPUDeviceContext;
using CPUCtx = phi::CPUContext;
REGISTER_OPERATOR(center_loss,
ops::CenterLossOp,
......
......@@ -113,13 +113,11 @@ It accomplishes the execution of the instruction according to the following step
} // namespace paddle::operators
namespace ops = paddle::operators;
using CPUDeviceContext = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR(
cinn_instruction_run,
ops::CinnInstructionRunOp,
ops::CinnInstructionRunOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
cinn_instruction_run,
ops::CinnInstructionRunOpKernel<CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(cinn_instruction_run,
ops::CinnInstructionRunOpKernel<phi::CPUContext, float>);
......@@ -189,6 +189,5 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
/* see [Why use single type kernel] */
REGISTER_OP_CPU_KERNEL(
cinn_launch,
ops::CinnLaunchOpKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(cinn_launch,
ops::CinnLaunchOpKernel<phi::CPUContext, float>);
......@@ -19,6 +19,5 @@ REGISTER_OP_WITHOUT_GRADIENT(clip_by_norm,
ops::ClipByNormOp,
ops::ClipByNormOpMaker);
REGISTER_OP_CPU_KERNEL(
clip_by_norm,
ops::ClipByNormKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(clip_by_norm,
ops::ClipByNormKernel<phi::CPUContext, float>);
......@@ -511,11 +511,10 @@ REGISTER_OPERATOR(coalesce_tensor,
paddle::operators::CoalesceTensorOpMaker);
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CPU_KERNEL(
coalesce_tensor,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, int>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(coalesce_tensor,
ops::CoalesceTensorOpKernel<phi::CPUContext, int>,
ops::CoalesceTensorOpKernel<phi::CPUContext, float>,
ops::CoalesceTensorOpKernel<phi::CPUContext, double>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL(
......@@ -550,20 +549,18 @@ REGISTER_OP_XPU_KERNEL(
#if defined(PADDLE_WITH_ASCEND_CL)
REGISTER_OP_NPU_KERNEL(
coalesce_tensor,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, int>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext,
plat::float16>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, double>);
ops::CoalesceTensorOpKernel<phi::CPUContext, int>,
ops::CoalesceTensorOpKernel<phi::CPUContext, float>,
ops::CoalesceTensorOpKernel<phi::CPUContext, plat::float16>,
ops::CoalesceTensorOpKernel<phi::CPUContext, double>);
#endif
#if defined(PADDLE_WITH_MLU)
REGISTER_OP_MLU_KERNEL(
coalesce_tensor,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext,
plat::float16>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, int>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, float>);
ops::CoalesceTensorOpKernel<phi::CPUContext, plat::float16>,
ops::CoalesceTensorOpKernel<phi::CPUContext, int>,
ops::CoalesceTensorOpKernel<phi::CPUContext, float>);
#endif
REGISTER_OP_VERSION(coalesce_tensor)
......
......@@ -73,10 +73,9 @@ REGISTER_OP_WITHOUT_GRADIENT(allreduce,
ops::AllReduceOp,
ops::AllReduceOpMaker);
REGISTER_OP_CPU_KERNEL(
allreduce,
ops::AllReduceOpKernel<plat::CPUDeviceContext, float>,
ops::AllReduceOpKernel<plat::CPUDeviceContext, double>,
ops::AllReduceOpKernel<plat::CPUDeviceContext, int>,
ops::AllReduceOpKernel<plat::CPUDeviceContext, int64_t>,
ops::AllReduceOpKernel<plat::CPUDeviceContext, plat::float16>);
REGISTER_OP_CPU_KERNEL(allreduce,
ops::AllReduceOpKernel<phi::CPUContext, float>,
ops::AllReduceOpKernel<phi::CPUContext, double>,
ops::AllReduceOpKernel<phi::CPUContext, int>,
ops::AllReduceOpKernel<phi::CPUContext, int64_t>,
ops::AllReduceOpKernel<phi::CPUContext, plat::float16>);
......@@ -143,12 +143,10 @@ REGISTER_OPERATOR(complex,
REGISTER_OPERATOR(complex_grad, ops::ComplexGradOp);
REGISTER_OP_CPU_KERNEL(
complex,
ops::ComplexKernel<paddle::platform::CPUDeviceContext, float>,
ops::ComplexKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
complex_grad,
ops::ComplexGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ComplexGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(complex,
ops::ComplexKernel<phi::CPUContext, float>,
ops::ComplexKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(complex_grad,
ops::ComplexGradKernel<phi::CPUContext, float>,
ops::ComplexGradKernel<phi::CPUContext, double>);
......@@ -161,12 +161,10 @@ REGISTER_OPERATOR(as_real,
ops::AsRealGradMaker<paddle::framework::OpDesc>,
ops::AsRealGradMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
as_complex,
ops::AsComplexKernel<paddle::platform::CPUDeviceContext, float>,
ops::AsComplexKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
as_real,
ops::AsRealKernel<paddle::platform::CPUDeviceContext, float>,
ops::AsRealKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(as_complex,
ops::AsComplexKernel<phi::CPUContext, float>,
ops::AsComplexKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(as_real,
ops::AsRealKernel<phi::CPUContext, float>,
ops::AsRealKernel<phi::CPUContext, double>);
......@@ -249,8 +249,6 @@ REGISTER_OPERATOR(cos_sim,
ops::CosSimGradOpMaker<paddle::framework::OpDesc>,
ops::CosSimGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(cos_sim_grad, ops::CosSimOpGrad);
REGISTER_OP_CPU_KERNEL(
cos_sim, ops::CosSimKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(
cos_sim_grad,
ops::CosSimGradKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(cos_sim, ops::CosSimKernel<phi::CPUContext, float>);
REGISTER_OP_CPU_KERNEL(cos_sim_grad,
ops::CosSimGradKernel<phi::CPUContext, float>);
......@@ -215,7 +215,6 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(crf_decoding,
ops::CRFDecodingOp,
ops::CRFDecodingOpMaker);
REGISTER_OP_CPU_KERNEL(
crf_decoding,
ops::CRFDecodingOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::CRFDecodingOpKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(crf_decoding,
ops::CRFDecodingOpKernel<phi::CPUContext, float>,
ops::CRFDecodingOpKernel<phi::CPUContext, double>);
......@@ -223,14 +223,12 @@ REGISTER_OPERATOR(crop,
ops::CropGradOpMaker<paddle::imperative::OpBase>,
ops::GropNoNeedBufferVarInferer);
REGISTER_OPERATOR(crop_grad, ops::CropOpGrad);
REGISTER_OP_CPU_KERNEL(
crop,
ops::CropKernel<paddle::platform::CPUDeviceContext, float>,
ops::CropKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
crop_grad,
ops::CropGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::CropGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(crop,
ops::CropKernel<phi::CPUContext, float>,
ops::CropKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(crop_grad,
ops::CropGradKernel<phi::CPUContext, float>,
ops::CropGradKernel<phi::CPUContext, double>);
REGISTER_OP_CUDA_KERNEL(
crop,
......
......@@ -320,18 +320,16 @@ REGISTER_OPERATOR(crop_tensor,
ops::CropTensorGradOpMaker<paddle::framework::OpDesc>,
ops::CropTensorGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(crop_tensor_grad, ops::CropTensorOpGrad);
REGISTER_OP_CPU_KERNEL(
crop_tensor,
ops::CropTensorKernel<paddle::platform::CPUDeviceContext, float>,
ops::CropTensorKernel<paddle::platform::CPUDeviceContext, double>,
ops::CropTensorKernel<paddle::platform::CPUDeviceContext, int>,
ops::CropTensorKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
crop_tensor_grad,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(crop_tensor,
ops::CropTensorKernel<phi::CPUContext, float>,
ops::CropTensorKernel<phi::CPUContext, double>,
ops::CropTensorKernel<phi::CPUContext, int>,
ops::CropTensorKernel<phi::CPUContext, int64_t>);
REGISTER_OP_CPU_KERNEL(crop_tensor_grad,
ops::CropTensorGradKernel<phi::CPUContext, float>,
ops::CropTensorGradKernel<phi::CPUContext, double>,
ops::CropTensorGradKernel<phi::CPUContext, int>,
ops::CropTensorGradKernel<phi::CPUContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
crop_tensor,
......
......@@ -421,7 +421,7 @@ class CrossEntropyGradOpMaker2 : public framework::SingleGradOpMaker<T> {
} // namespace paddle
namespace ops = paddle::operators;
using CPUCtx = paddle::platform::CPUDeviceContext;
using CPUCtx = phi::CPUContext;
REGISTER_OPERATOR(cross_entropy,
ops::CrossEntropyOpBase,
......
......@@ -129,7 +129,6 @@ REGISTER_OPERATOR(
ops::CTCAlignOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
ctc_align,
ops::CTCAlignKernel<paddle::platform::CPUDeviceContext, int>,
ops::CTCAlignKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(ctc_align,
ops::CTCAlignKernel<phi::CPUContext, int>,
ops::CTCAlignKernel<phi::CPUContext, int64_t>);
......@@ -145,7 +145,7 @@ class LogcumsumexpGradMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
DECLARE_INFER_SHAPE_FUNCTOR(cumsum,
CumsumInferShapeFunctor,
PD_INFER_META(phi::CumInferMeta));
......
......@@ -287,8 +287,7 @@ The required data format for this layer is one of the following:
};
template <typename T>
class DataNormKernel<platform::CPUDeviceContext, T>
: public framework::OpKernel<T> {
class DataNormKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
// const bool is_test = ctx.Attr<bool>("is_test");
......@@ -533,8 +532,7 @@ class DataNormGradOp : public framework::OperatorWithKernel {
};
template <typename T>
class DataNormGradKernel<platform::CPUDeviceContext, T>
: public framework::OpKernel<T> {
class DataNormGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
const auto *x = ctx.Input<Tensor>("X");
......@@ -788,14 +786,12 @@ REGISTER_OPERATOR(data_norm,
ops::DataNormGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(data_norm_grad, ops::DataNormGradOp);
REGISTER_OP_CPU_KERNEL(
data_norm,
ops::DataNormKernel<paddle::platform::CPUDeviceContext, float>,
ops::DataNormKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
data_norm_grad,
ops::DataNormGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::DataNormGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(data_norm,
ops::DataNormKernel<phi::CPUContext, float>,
ops::DataNormKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(data_norm_grad,
ops::DataNormGradKernel<phi::CPUContext, float>,
ops::DataNormGradKernel<phi::CPUContext, double>);
REGISTER_OP_VERSION(data_norm).AddCheckpoint(
R"ROC(
upgrad data_norm op by adding scale_w to support scale and shift.)ROC",
......
......@@ -349,7 +349,7 @@ class DeformablePSROIPoolGradOp : public framework::OperatorWithKernel {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
deformable_psroi_pooling,
ops::DeformablePSROIPoolOp,
......
......@@ -33,8 +33,8 @@ namespace paddle {
namespace operators {
template <typename T>
struct DequantizeFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& dev_ctx,
struct DequantizeFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext& dev_ctx,
const framework::Tensor* in,
const framework::Tensor* scale,
float max_range,
......@@ -49,8 +49,8 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> {
}
};
template struct DequantizeFunctor<platform::CPUDeviceContext, int8_t>;
template struct DequantizeFunctor<platform::CPUDeviceContext, int16_t>;
template struct DequantizeFunctor<phi::CPUContext, int8_t>;
template struct DequantizeFunctor<phi::CPUContext, int16_t>;
class DequantizeMaxAbsOp : public framework::OperatorWithKernel {
public:
......@@ -102,7 +102,7 @@ $$Out = \frac{scale*X}{ max\_range }$$
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
dequantize_abs_max,
......
......@@ -32,8 +32,8 @@ namespace paddle {
namespace operators {
template <typename T>
struct DequantizeFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& dev_ctx,
struct DequantizeFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext& dev_ctx,
const framework::Tensor* in,
const framework::Tensor* dict,
framework::Tensor* out) {
......@@ -51,7 +51,7 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> {
}
};
template struct DequantizeFunctor<platform::CPUDeviceContext, int8_t>;
template struct DequantizeFunctor<phi::CPUContext, int8_t>;
class DequantizeLogOp : public framework::OperatorWithKernel {
public:
......@@ -108,7 +108,7 @@ This calculation is an opposite operation of QuantizeLogOp:
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
dequantize_log,
......
......@@ -200,7 +200,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
auto* match_indices = context.Output<Tensor>("ColToRowMatchIndices");
auto* match_dist = context.Output<Tensor>("ColToRowMatchDist");
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
auto& dev_ctx = context.device_context<phi::CPUContext>();
auto col = dist_mat->dims()[1];
......@@ -216,9 +216,9 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
match_indices->mutable_data<int>({n, col}, context.GetPlace());
match_dist->mutable_data<T>({n, col}, context.GetPlace());
phi::funcs::SetConstant<platform::CPUDeviceContext, int> iset;
phi::funcs::SetConstant<phi::CPUContext, int> iset;
iset(dev_ctx, match_indices, static_cast<int>(-1));
phi::funcs::SetConstant<platform::CPUDeviceContext, T> tset;
phi::funcs::SetConstant<phi::CPUContext, T> tset;
tset(dev_ctx, match_dist, static_cast<T>(0));
int* indices = match_indices->data<int>();
......
......@@ -104,7 +104,6 @@ REGISTER_OPERATOR(
ops::BoxClipOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
box_clip,
ops::BoxClipKernel<paddle::platform::CPUDeviceContext, float>,
ops::BoxClipKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(box_clip,
ops::BoxClipKernel<phi::CPUContext, float>,
ops::BoxClipKernel<phi::CPUContext, double>);
......@@ -29,8 +29,7 @@ class BoxClipKernel : public framework::OpKernel<T> {
auto* input_box = context.Input<LoDTensor>("Input");
auto* im_info = context.Input<LoDTensor>("ImInfo");
auto* output_box = context.Output<LoDTensor>("Output");
auto& dev_ctx =
context.template device_context<platform::CPUDeviceContext>();
auto& dev_ctx = context.template device_context<phi::CPUContext>();
output_box->mutable_data<T>(context.GetPlace());
if (input_box->lod().size()) {
PADDLE_ENFORCE_EQ(input_box->lod().size(),
......
......@@ -251,7 +251,6 @@ REGISTER_OPERATOR(
ops::BoxCoderOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
box_coder,
ops::BoxCoderKernel<paddle::platform::CPUDeviceContext, float>,
ops::BoxCoderKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(box_coder,
ops::BoxCoderKernel<phi::CPUContext, float>,
ops::BoxCoderKernel<phi::CPUContext, double>);
......@@ -227,7 +227,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
box_decoder_and_assign,
ops::BoxDecoderAndAssignKernel<paddle::platform::CPUDeviceContext, float>,
ops::BoxDecoderAndAssignKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(box_decoder_and_assign,
ops::BoxDecoderAndAssignKernel<phi::CPUContext, float>,
ops::BoxDecoderAndAssignKernel<phi::CPUContext, double>);
......@@ -122,7 +122,7 @@ class GenerateMaskLabelsOp : public framework::OperatorWithKernel {
* to encode class specific mask targets.
*/
template <typename T>
static inline void ExpandMaskTarget(const platform::CPUDeviceContext& ctx,
static inline void ExpandMaskTarget(const phi::CPUContext& ctx,
const Tensor& masks,
const Tensor& mask_class_labels,
const int resolution,
......@@ -150,7 +150,7 @@ static inline void ExpandMaskTarget(const platform::CPUDeviceContext& ctx,
}
template <typename T>
std::vector<Tensor> SampleMaskForOneImage(const platform::CPUDeviceContext& ctx,
std::vector<Tensor> SampleMaskForOneImage(const phi::CPUContext& ctx,
const Tensor& im_info,
const Tensor& gt_classes,
const Tensor& is_crowd,
......@@ -391,7 +391,7 @@ class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
std::vector<size_t> lod0(1, 0);
int64_t num_mask = 0;
auto& dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
auto& dev_ctx = ctx.device_context<phi::CPUContext>();
auto gt_classes_lod = gt_classes->lod().back();
auto is_crowd_lod = is_crowd->lod().back();
......
......@@ -168,7 +168,7 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel {
};
template <typename T>
void Concat(const platform::CPUDeviceContext& context,
void Concat(const phi::CPUContext& context,
const Tensor& in_tensor_a,
const Tensor& in_tensor_b,
Tensor* out_tensor) {
......@@ -176,24 +176,23 @@ void Concat(const platform::CPUDeviceContext& context,
std::vector<Tensor> inputs;
inputs.emplace_back(in_tensor_a);
inputs.emplace_back(in_tensor_b);
math::ConcatFunctor<platform::CPUDeviceContext, T> concat_functor;
math::ConcatFunctor<phi::CPUContext, T> concat_functor;
concat_functor(context, inputs, axis, out_tensor);
}
template <typename T>
std::vector<std::vector<int>> SampleFgBgGt(
const platform::CPUDeviceContext& context,
Tensor* iou,
const Tensor& is_crowd,
const int batch_size_per_im,
const float fg_fraction,
const float fg_thresh,
const float bg_thresh_hi,
const float bg_thresh_lo,
std::minstd_rand engine,
const bool use_random,
const bool is_cascade_rcnn,
const Tensor& rpn_rois) {
std::vector<std::vector<int>> SampleFgBgGt(const phi::CPUContext& context,
Tensor* iou,
const Tensor& is_crowd,
const int batch_size_per_im,
const float fg_fraction,
const float fg_thresh,
const float bg_thresh_hi,
const float bg_thresh_lo,
std::minstd_rand engine,
const bool use_random,
const bool is_cascade_rcnn,
const Tensor& rpn_rois) {
std::vector<int> fg_inds;
std::vector<int> bg_inds;
std::vector<int> mapped_gt_inds;
......@@ -286,7 +285,7 @@ std::vector<std::vector<int>> SampleFgBgGt(
}
template <typename T>
void GatherBoxesLabels(const platform::CPUDeviceContext& context,
void GatherBoxesLabels(const phi::CPUContext& context,
const Tensor& boxes,
const Tensor& max_overlap,
const Tensor& gt_boxes,
......@@ -335,7 +334,7 @@ void GatherBoxesLabels(const platform::CPUDeviceContext& context,
template <typename T>
std::vector<Tensor> SampleRoisForOneImage(
const platform::CPUDeviceContext& context,
const phi::CPUContext& context,
const Tensor& rpn_rois_in,
const Tensor& gt_classes,
const Tensor& is_crowd,
......@@ -372,7 +371,7 @@ std::vector<Tensor> SampleRoisForOneImage(
Tensor roi_filter;
// Tensor box_filter;
if (keep.numel() == 0) {
phi::funcs::SetConstant<platform::CPUDeviceContext, T> set_zero;
phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
roi_filter.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
set_zero(context, &roi_filter, static_cast<T>(0));
} else {
......@@ -597,7 +596,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
std::vector<size_t> lod0(1, 0);
int64_t num_rois = 0;
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
auto& dev_ctx = context.device_context<phi::CPUContext>();
auto rpn_rois_lod = rpn_rois->lod().back();
auto gt_classes_lod = gt_classes->lod().back();
......
......@@ -98,8 +98,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
float min_size = context.Attr<float>("min_size");
float eta = context.Attr<float>("eta");
auto &dev_ctx =
context.template device_context<platform::CPUDeviceContext>();
auto &dev_ctx = context.template device_context<phi::CPUContext>();
auto &scores_dim = scores->dims();
int64_t num = scores_dim[0];
......@@ -122,7 +121,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
dev_ctx.GetPlace());
phi::funcs::Transpose<platform::CPUDeviceContext, T, 4> trans;
phi::funcs::Transpose<phi::CPUContext, T, 4> trans;
std::vector<int> axis = {0, 2, 3, 1};
trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis);
trans(dev_ctx, *scores, &scores_swap, axis);
......@@ -181,7 +180,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
}
std::pair<Tensor, Tensor> ProposalForOneImage(
const platform::CPUDeviceContext &ctx,
const phi::CPUContext &ctx,
const Tensor &im_info_slice,
const Tensor &anchors,
const Tensor &variances,
......@@ -234,7 +233,7 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
FilterBoxes<T>(ctx, &proposals, min_size, im_info_slice, true, &keep);
// Handle the case when there is no keep index left
if (keep.numel() == 0) {
phi::funcs::SetConstant<platform::CPUDeviceContext, T> set_zero;
phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace());
set_zero(ctx, &bbox_sel, static_cast<T>(0));
Tensor scores_filter;
......
......@@ -99,8 +99,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
float eta = context.Attr<float>("eta");
bool pixel_offset = context.Attr<bool>("pixel_offset");
auto &dev_ctx =
context.template device_context<platform::CPUDeviceContext>();
auto &dev_ctx = context.template device_context<phi::CPUContext>();
auto &scores_dim = scores->dims();
int64_t num = scores_dim[0];
......@@ -123,7 +122,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
scores_swap.mutable_data<T>({num, h_score, w_score, c_score},
dev_ctx.GetPlace());
phi::funcs::Transpose<platform::CPUDeviceContext, T, 4> trans;
phi::funcs::Transpose<phi::CPUContext, T, 4> trans;
std::vector<int> axis = {0, 2, 3, 1};
trans(dev_ctx, *bbox_deltas, &bbox_deltas_swap, axis);
trans(dev_ctx, *scores, &scores_swap, axis);
......@@ -183,7 +182,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
}
std::pair<Tensor, Tensor> ProposalForOneImage(
const platform::CPUDeviceContext &ctx,
const phi::CPUContext &ctx,
const Tensor &im_shape_slice,
const Tensor &anchors,
const Tensor &variances,
......@@ -240,7 +239,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
ctx, &proposals, min_size, im_shape_slice, false, &keep, pixel_offset);
// Handle the case when there is no keep index left
if (keep.numel() == 0) {
phi::funcs::SetConstant<platform::CPUDeviceContext, T> set_zero;
phi::funcs::SetConstant<phi::CPUContext, T> set_zero;
bbox_sel.mutable_data<T>({1, 4}, ctx.GetPlace());
set_zero(ctx, &bbox_sel, static_cast<T>(0));
Tensor scores_filter;
......
......@@ -113,7 +113,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
iou_similarity,
ops::IOUSimilarityKernel<paddle::platform::CPUDeviceContext, float>,
ops::IOUSimilarityKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(iou_similarity,
ops::IOUSimilarityKernel<phi::CPUContext, float>,
ops::IOUSimilarityKernel<phi::CPUContext, double>);
......@@ -356,7 +356,7 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
auto* outs = ctx.Output<LoDTensor>("Out");
auto& score_dims = scores_input->dims();
auto score_size = score_dims.size();
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
LoDTensor scores;
LoDTensor boxes;
......
......@@ -403,7 +403,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
mine_hard_examples,
ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, float>,
ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(mine_hard_examples,
ops::MineHardExamplesKernel<phi::CPUContext, float>,
ops::MineHardExamplesKernel<phi::CPUContext, double>);
......@@ -219,7 +219,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
T nms_threshold = static_cast<T>(ctx.Attr<float>("nms_threshold"));
T nms_eta = static_cast<T>(ctx.Attr<float>("nms_eta"));
T score_threshold = static_cast<T>(ctx.Attr<float>("score_threshold"));
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
int num_det = 0;
......@@ -361,7 +361,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
auto rois_num = ctx.Input<Tensor>("RoisNum");
auto score_dims = scores->dims();
auto score_size = score_dims.size();
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
std::vector<std::map<int, std::vector<int>>> all_indices;
std::vector<size_t> batch_starts = {0};
......
......@@ -507,7 +507,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
int64_t box_dim = box_dims[2];
int64_t out_dim = box_dim + 2;
auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>();
auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
std::vector<std::vector<std::vector<T>>> all_nmsed_out;
std::vector<size_t> batch_starts = {0};
......
......@@ -112,12 +112,11 @@ void AppendRpns(LoDTensor* out, int64_t offset, Tensor* to_add) {
}
template <typename T>
std::vector<Tensor> FilterStraddleAnchor(
const platform::CPUDeviceContext& context,
const Tensor* anchor,
const float rpn_straddle_thresh,
T im_height,
T im_width) {
std::vector<Tensor> FilterStraddleAnchor(const phi::CPUContext& context,
const Tensor* anchor,
const float rpn_straddle_thresh,
T im_height,
T im_width) {
std::vector<int> inds_inside;
int anchor_num = anchor->dims()[0];
auto* anchor_data = anchor->data<T>();
......@@ -154,7 +153,7 @@ std::vector<Tensor> FilterStraddleAnchor(
}
template <typename T>
Tensor FilterCrowdGt(const platform::CPUDeviceContext& context,
Tensor FilterCrowdGt(const phi::CPUContext& context,
Tensor* gt_boxes,
Tensor* is_crowd) {
int gt_num = gt_boxes->dims()[0];
......@@ -300,7 +299,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data,
}
template <typename T>
std::vector<Tensor> SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx,
std::vector<Tensor> SampleRpnFgBgGt(const phi::CPUContext& ctx,
const Tensor& anchor_by_gt_overlap,
const int rpn_batch_size_per_im,
const float rpn_positive_overlap,
......@@ -437,7 +436,7 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
tgt_bbox->mutable_data<T>({max_num, 4}, place);
tgt_lbl->mutable_data<int>({max_num, 1}, place);
bbox_inside_weight->mutable_data<T>({max_num, 4}, place);
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
auto& dev_ctx = context.device_context<phi::CPUContext>();
std::random_device rnd;
std::minstd_rand engine;
......@@ -857,11 +856,10 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel {
};
template <typename T>
std::vector<Tensor> FilterCrowdGtBoxLabel(
const platform::CPUDeviceContext& context,
Tensor* gt_boxes,
Tensor* gt_labels,
Tensor* is_crowd) {
std::vector<Tensor> FilterCrowdGtBoxLabel(const phi::CPUContext& context,
Tensor* gt_boxes,
Tensor* gt_labels,
Tensor* is_crowd) {
int gt_num = gt_boxes->dims()[0];
std::vector<int> not_crowd_inds;
auto* is_crowd_data = is_crowd->data<int>();
......@@ -893,7 +891,7 @@ std::vector<Tensor> FilterCrowdGtBoxLabel(
}
template <typename T>
std::vector<Tensor> GetAllFgBgGt(const platform::CPUDeviceContext& ctx,
std::vector<Tensor> GetAllFgBgGt(const phi::CPUContext& ctx,
const Tensor& anchor_by_gt_overlap,
const Tensor& ncrowd_gt_labels,
const float positive_overlap,
......@@ -1044,7 +1042,7 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
tgt_lbl->mutable_data<int>({max_num, 1}, place);
bbox_inside_weight->mutable_data<T>({max_num, 4}, place);
fg_num->mutable_data<int>({batch_num, 1}, place);
auto& dev_ctx = context.device_context<platform::CPUDeviceContext>();
auto& dev_ctx = context.device_context<phi::CPUContext>();
std::random_device rnd;
std::minstd_rand engine;
......
......@@ -266,12 +266,10 @@ REGISTER_OPERATOR(sigmoid_focal_loss,
ops::SigmoidFocalLossGradOpMaker<paddle::framework::OpDesc>,
ops::SigmoidFocalLossGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(sigmoid_focal_loss_grad, ops::SigmoidFocalLossGradOp);
REGISTER_OP_CPU_KERNEL(
sigmoid_focal_loss,
ops::SigmoidFocalLossKernel<paddle::platform::CPUDeviceContext, float>,
ops::SigmoidFocalLossKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(sigmoid_focal_loss,
ops::SigmoidFocalLossKernel<phi::CPUContext, float>,
ops::SigmoidFocalLossKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(
sigmoid_focal_loss_grad,
ops::SigmoidFocalLossGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SigmoidFocalLossGradKernel<paddle::platform::CPUDeviceContext,
double>);
ops::SigmoidFocalLossGradKernel<phi::CPUContext, float>,
ops::SigmoidFocalLossGradKernel<phi::CPUContext, double>);
......@@ -149,8 +149,8 @@ for i-th instance and each `id` of NegIndices in this instance:
};
template <typename T, typename WT>
struct NegTargetAssignFunctor<platform::CPUDeviceContext, T, WT> {
void operator()(const platform::CPUDeviceContext& ctx,
struct NegTargetAssignFunctor<phi::CPUContext, T, WT> {
void operator()(const phi::CPUContext& ctx,
const int* neg_indices,
const size_t* lod,
const int N,
......@@ -172,10 +172,8 @@ struct NegTargetAssignFunctor<platform::CPUDeviceContext, T, WT> {
}
};
template struct NegTargetAssignFunctor<platform::CPUDeviceContext, int, float>;
template struct NegTargetAssignFunctor<platform::CPUDeviceContext,
float,
float>;
template struct NegTargetAssignFunctor<phi::CPUContext, int, float>;
template struct NegTargetAssignFunctor<phi::CPUContext, float, float>;
} // namespace operators
} // namespace paddle
......@@ -187,7 +185,6 @@ REGISTER_OPERATOR(
ops::TargetAssignOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
target_assign,
ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, int, float>,
ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, float, float>);
REGISTER_OP_CPU_KERNEL(target_assign,
ops::TargetAssignKernel<phi::CPUContext, int, float>,
ops::TargetAssignKernel<phi::CPUContext, float, float>);
......@@ -179,12 +179,10 @@ REGISTER_OPERATOR(slogdeterminant,
REGISTER_OPERATOR(slogdeterminant_grad,
ops::SlogDeterminantGradOp) // reuse det grad op
REGISTER_OP_CPU_KERNEL(
slogdeterminant,
ops::SlogDeterminantKernel<plat::CPUDeviceContext, float>,
ops::SlogDeterminantKernel<plat::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
slogdeterminant_grad,
ops::SlogDeterminantGradKernel<plat::CPUDeviceContext, float>,
ops::SlogDeterminantGradKernel<plat::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(slogdeterminant,
ops::SlogDeterminantKernel<phi::CPUContext, float>,
ops::SlogDeterminantKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(slogdeterminant_grad,
ops::SlogDeterminantGradKernel<phi::CPUContext, float>,
ops::SlogDeterminantGradKernel<phi::CPUContext, double>);
......@@ -66,6 +66,5 @@ REGISTER_OP_WITHOUT_GRADIENT(dgc_clip_by_norm,
ops::DGCClipByNormOp,
ops::DGCClipByNormOpMaker);
REGISTER_OP_CPU_KERNEL(
dgc_clip_by_norm,
ops::DGCClipByNormKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(dgc_clip_by_norm,
ops::DGCClipByNormKernel<phi::CPUContext, float>);
......@@ -138,9 +138,8 @@ REGISTER_OPERATOR(
ops::DiagEmbedOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
diag_embed,
ops::DiagEmbedKernel<paddle::platform::CPUDeviceContext, int>,
ops::DiagEmbedKernel<paddle::platform::CPUDeviceContext, float>,
ops::DiagEmbedKernel<paddle::platform::CPUDeviceContext, double>,
ops::DiagEmbedKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(diag_embed,
ops::DiagEmbedKernel<phi::CPUContext, int>,
ops::DiagEmbedKernel<phi::CPUContext, float>,
ops::DiagEmbedKernel<phi::CPUContext, double>,
ops::DiagEmbedKernel<phi::CPUContext, int64_t>);
......@@ -59,9 +59,8 @@ REGISTER_OPERATOR(
ops::DiagOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
diag,
ops::DiagKernel<paddle::platform::CPUDeviceContext, int>,
ops::DiagKernel<paddle::platform::CPUDeviceContext, float>,
ops::DiagKernel<paddle::platform::CPUDeviceContext, double>,
ops::DiagKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(diag,
ops::DiagKernel<phi::CPUContext, int>,
ops::DiagKernel<phi::CPUContext, float>,
ops::DiagKernel<phi::CPUContext, double>,
ops::DiagKernel<phi::CPUContext, int64_t>);
......@@ -42,11 +42,11 @@ struct GammaCPUFunctor {
};
template <typename T>
struct DirichletSampler<platform::CPUDeviceContext, T> {
struct DirichletSampler<phi::CPUContext, T> {
void operator()(const framework::ExecutionContext& ctx,
const Tensor* alpha,
Tensor* out) {
auto& dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
auto& dev_ctx = ctx.device_context<phi::CPUContext>();
auto p_gen = framework::DefaultCPUGenerator();
auto generator = p_gen->GetCPUEngine();
......@@ -71,8 +71,7 @@ struct DirichletSampler<platform::CPUDeviceContext, T> {
gamma_samples.data<T>(),
standard_uniform,
standard_normal);
platform::ForRange<platform::CPUDeviceContext> for_range(dev_ctx,
alpha->numel());
platform::ForRange<phi::CPUContext> for_range(dev_ctx, alpha->numel());
for_range(gamma_functor);
// normalize them into a simplex, along the last axis
......@@ -81,10 +80,10 @@ struct DirichletSampler<platform::CPUDeviceContext, T> {
new_shape[new_shape.size() - 1] = 1;
gamma_sum.mutable_data<T>(new_shape, dev_ctx.GetPlace());
ReduceKernelFunctor<platform::CPUDeviceContext, T, SumFunctor>(
ReduceKernelFunctor<phi::CPUContext, T, SumFunctor>(
&gamma_samples, &gamma_sum, {new_shape.size() - 1}, true, false, ctx)
.template apply<T>();
ElementwiseComputeEx<DivFunctor<T>, platform::CPUDeviceContext, T, T>(
ElementwiseComputeEx<DivFunctor<T>, phi::CPUContext, T, T>(
ctx, &gamma_samples, &gamma_sum, -1, DivFunctor<T>(), out);
}
};
......@@ -125,7 +124,5 @@ REGISTER_OP_WITHOUT_GRADIENT(dirichlet,
paddle::operators::DirichletOpMaker);
REGISTER_OP_CPU_KERNEL(
dirichlet,
paddle::operators::DirichletKernel<paddle::platform::CPUDeviceContext,
float>,
paddle::operators::DirichletKernel<paddle::platform::CPUDeviceContext,
double>);
paddle::operators::DirichletKernel<phi::CPUContext, float>,
paddle::operators::DirichletKernel<phi::CPUContext, double>);
......@@ -91,7 +91,7 @@ void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
TEST(Dropout, CPUDense) {
f::Scope scope;
p::CPUPlace place;
p::CPUDeviceContext ctx(place);
phi::CPUContext ctx(place);
Compare(scope, ctx);
}
......
......@@ -164,19 +164,15 @@ REGISTER_OPERATOR(eig,
REGISTER_OPERATOR(eig_grad, ops::EigGradOp);
REGISTER_OP_CPU_KERNEL(
eig,
ops::EigKernel<paddle::platform::CPUDeviceContext, float, complex64>,
ops::EigKernel<paddle::platform::CPUDeviceContext, double, complex128>,
ops::EigKernel<paddle::platform::CPUDeviceContext, complex64, complex64>,
ops::EigKernel<paddle::platform::CPUDeviceContext, complex128, complex128>);
REGISTER_OP_CPU_KERNEL(eig,
ops::EigKernel<phi::CPUContext, float, complex64>,
ops::EigKernel<phi::CPUContext, double, complex128>,
ops::EigKernel<phi::CPUContext, complex64, complex64>,
ops::EigKernel<phi::CPUContext, complex128, complex128>);
REGISTER_OP_CPU_KERNEL(
eig_grad,
ops::EigGradKernel<paddle::platform::CPUDeviceContext, float, complex64>,
ops::EigGradKernel<paddle::platform::CPUDeviceContext, double, complex128>,
ops::
EigGradKernel<paddle::platform::CPUDeviceContext, complex64, complex64>,
ops::EigGradKernel<paddle::platform::CPUDeviceContext,
complex128,
complex128>);
ops::EigGradKernel<phi::CPUContext, float, complex64>,
ops::EigGradKernel<phi::CPUContext, double, complex128>,
ops::EigGradKernel<phi::CPUContext, complex64, complex64>,
ops::EigGradKernel<phi::CPUContext, complex128, complex128>);
......@@ -70,7 +70,7 @@ void TransposeTwoAxis(const Tensor& input,
permute[axis2] = axis1;
transposed_input->mutable_data<T>(input.dims(), context.GetPlace());
auto& dev_ctx = context.template device_context<platform::CPUDeviceContext>();
auto& dev_ctx = context.template device_context<phi::CPUContext>();
TransCompute<DeviceContext, T>(
input.dims().size(), dev_ctx, input, transposed_input, permute);
......
......@@ -86,10 +86,9 @@ REGISTER_OPERATOR(eigvals,
ops::EigvalsOp,
ops::EigvalsOpMaker,
ops::EigvalsOpVarTypeInference);
REGISTER_OP_CPU_KERNEL(eigvals,
ops::EigvalsKernel<plat::CPUDeviceContext, float>,
ops::EigvalsKernel<plat::CPUDeviceContext, double>,
ops::EigvalsKernel<plat::CPUDeviceContext,
paddle::platform::complex<float>>,
ops::EigvalsKernel<plat::CPUDeviceContext,
paddle::platform::complex<double>>);
REGISTER_OP_CPU_KERNEL(
eigvals,
ops::EigvalsKernel<phi::CPUContext, float>,
ops::EigvalsKernel<phi::CPUContext, double>,
ops::EigvalsKernel<phi::CPUContext, paddle::platform::complex<float>>,
ops::EigvalsKernel<phi::CPUContext, paddle::platform::complex<double>>);
......@@ -151,24 +151,23 @@ REGISTER_OPERATOR(eigvalsh,
ops::EigvalshGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(eigvalsh_grad, ops::EigvalshGradOp);
REGISTER_OP_CPU_KERNEL(
eigvalsh,
ops::EigvalshKernel<paddle::platform::CPUDeviceContext, float, float>,
ops::EigvalshKernel<paddle::platform::CPUDeviceContext, double, double>,
ops::EigvalshKernel<paddle::platform::CPUDeviceContext,
float,
paddle::platform::complex<float>>,
ops::EigvalshKernel<paddle::platform::CPUDeviceContext,
double,
paddle::platform::complex<double>>);
REGISTER_OP_CPU_KERNEL(eigvalsh,
ops::EigvalshKernel<phi::CPUContext, float, float>,
ops::EigvalshKernel<phi::CPUContext, double, double>,
ops::EigvalshKernel<phi::CPUContext,
float,
paddle::platform::complex<float>>,
ops::EigvalshKernel<phi::CPUContext,
double,
paddle::platform::complex<double>>);
REGISTER_OP_CPU_KERNEL(
eigvalsh_grad,
ops::EigvalshGradKernel<paddle::platform::CPUDeviceContext, float, float>,
ops::EigvalshGradKernel<paddle::platform::CPUDeviceContext, double, double>,
ops::EigvalshGradKernel<paddle::platform::CPUDeviceContext,
ops::EigvalshGradKernel<phi::CPUContext, float, float>,
ops::EigvalshGradKernel<phi::CPUContext, double, double>,
ops::EigvalshGradKernel<phi::CPUContext,
float,
paddle::platform::complex<float>>,
ops::EigvalshGradKernel<paddle::platform::CPUDeviceContext,
ops::EigvalshGradKernel<phi::CPUContext,
double,
paddle::platform::complex<double>>);
......@@ -146,19 +146,17 @@ REGISTER_OPERATOR(expand_as,
REGISTER_OPERATOR(expand_as_grad,
ops::ExpandAsGradOp,
ops::ExpandAsGradNoNeedBufVarsInferer);
REGISTER_OP_CPU_KERNEL(
expand_as,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, double>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, bool>);
REGISTER_OP_CPU_KERNEL(
expand_as_grad,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(expand_as,
ops::ExpandAsKernel<phi::CPUContext, float>,
ops::ExpandAsKernel<phi::CPUContext, double>,
ops::ExpandAsKernel<phi::CPUContext, int>,
ops::ExpandAsKernel<phi::CPUContext, int64_t>,
ops::ExpandAsKernel<phi::CPUContext, bool>);
REGISTER_OP_CPU_KERNEL(expand_as_grad,
ops::ExpandAsGradKernel<phi::CPUContext, int>,
ops::ExpandAsGradKernel<phi::CPUContext, int64_t>,
ops::ExpandAsGradKernel<phi::CPUContext, float>,
ops::ExpandAsGradKernel<phi::CPUContext, double>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL(
expand_as,
......
......@@ -280,19 +280,17 @@ REGISTER_OPERATOR(expand_grad,
ops::ExpandDoubleGradOpMaker<paddle::framework::OpDesc>,
ops::ExpandDoubleGradOpMaker<paddle::imperative::OpBase>,
ops::ExpandGradNoNeedBufVarsInferer);
REGISTER_OP_CPU_KERNEL(
expand,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, double>,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ExpandKernel<paddle::platform::CPUDeviceContext, bool>);
REGISTER_OP_CPU_KERNEL(
expand_grad,
ops::ExpandGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::ExpandGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(expand,
ops::ExpandKernel<phi::CPUContext, float>,
ops::ExpandKernel<phi::CPUContext, double>,
ops::ExpandKernel<phi::CPUContext, int>,
ops::ExpandKernel<phi::CPUContext, int64_t>,
ops::ExpandKernel<phi::CPUContext, bool>);
REGISTER_OP_CPU_KERNEL(expand_grad,
ops::ExpandGradKernel<phi::CPUContext, float>,
ops::ExpandGradKernel<phi::CPUContext, double>,
ops::ExpandGradKernel<phi::CPUContext, int>,
ops::ExpandGradKernel<phi::CPUContext, int64_t>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL(
expand,
......
......@@ -62,8 +62,7 @@ class ExponentialOpInferVarType
};
template <typename T>
class ExponentialKernel<platform::CPUDeviceContext, T>
: public framework::OpKernel<T> {
class ExponentialKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
auto *out = ctx.Output<framework::Tensor>("Out");
......@@ -135,9 +134,8 @@ REGISTER_OPERATOR(exponential_grad,
ExponentialGradInferer);
REGISTER_OP_CPU_KERNEL(exponential,
ops::ExponentialKernel<plat::CPUDeviceContext, float>,
ops::ExponentialKernel<plat::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(
exponential_grad,
ops::ExponentialGradKernel<plat::CPUDeviceContext, float>,
ops::ExponentialGradKernel<plat::CPUDeviceContext, double>);
ops::ExponentialKernel<phi::CPUContext, float>,
ops::ExponentialKernel<phi::CPUContext, double>);
REGISTER_OP_CPU_KERNEL(exponential_grad,
ops::ExponentialGradKernel<phi::CPUContext, float>,
ops::ExponentialGradKernel<phi::CPUContext, double>);
......@@ -23,8 +23,8 @@ namespace paddle {
namespace operators {
template <typename T>
struct DequantizeFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& dev_ctx,
struct DequantizeFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext& dev_ctx,
const framework::Tensor* in,
const framework::Tensor* scale,
T max_range,
......@@ -39,8 +39,8 @@ struct DequantizeFunctor<platform::CPUDeviceContext, T> {
};
template <typename T>
struct ChannelDequantizeFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& dev_ctx,
struct ChannelDequantizeFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext& dev_ctx,
const framework::Tensor* in,
const framework::Tensor** scales,
const int scale_num,
......@@ -139,10 +139,10 @@ struct ChannelDequantizeFunctor<platform::CPUDeviceContext, T> {
}
};
template struct DequantizeFunctor<platform::CPUDeviceContext, float>;
template struct DequantizeFunctor<platform::CPUDeviceContext, double>;
template struct ChannelDequantizeFunctor<platform::CPUDeviceContext, float>;
template struct ChannelDequantizeFunctor<platform::CPUDeviceContext, double>;
template struct DequantizeFunctor<phi::CPUContext, float>;
template struct DequantizeFunctor<phi::CPUContext, double>;
template struct ChannelDequantizeFunctor<phi::CPUContext, float>;
template struct ChannelDequantizeFunctor<phi::CPUContext, double>;
class FakeDequantizeMaxAbsOp : public framework::OperatorWithKernel {
public:
......@@ -269,7 +269,7 @@ Notes: In general, the per-channel quantization is only applied to weights and t
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
fake_dequantize_max_abs,
......
......@@ -32,8 +32,8 @@ struct Compare {
};
template <typename T>
struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx,
struct FindAbsMaxFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const T *in,
const int num,
T *out) {
......@@ -41,11 +41,11 @@ struct FindAbsMaxFunctor<platform::CPUDeviceContext, T> {
}
};
template struct FindAbsMaxFunctor<platform::CPUDeviceContext, float>;
template struct FindAbsMaxFunctor<phi::CPUContext, float>;
template <typename T>
struct FindChannelAbsMaxFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx,
struct FindChannelAbsMaxFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in_tensor,
const int quant_axis,
T *out_abs_max) {
......@@ -86,11 +86,11 @@ struct FindChannelAbsMaxFunctor<platform::CPUDeviceContext, T> {
}
};
template struct FindChannelAbsMaxFunctor<platform::CPUDeviceContext, float>;
template struct FindChannelAbsMaxFunctor<phi::CPUContext, float>;
template <typename T>
struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx,
struct ClipAndFakeQuantFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in,
const framework::Tensor &scale,
const int bin_cnt,
......@@ -98,7 +98,7 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
framework::Tensor *out) {
T s = scale.data<T>()[0];
T inv_s = inverse(s);
platform::Transform<platform::CPUDeviceContext> trans;
platform::Transform<phi::CPUContext> trans;
if (round_type == 0) {
trans(ctx,
in.data<T>(),
......@@ -117,11 +117,11 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
}
};
template struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, float>;
template struct ClipAndFakeQuantFunctor<phi::CPUContext, float>;
template <typename T>
struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx,
struct ClipAndFakeQuantDequantFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in,
const framework::Tensor &scale,
const int bin_cnt,
......@@ -130,7 +130,7 @@ struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
T s = scale.data<T>()[0];
T inv_s = inverse(s);
platform::Transform<platform::CPUDeviceContext> trans;
platform::Transform<phi::CPUContext> trans;
if (round_type == 0) {
trans(ctx,
in.data<T>(),
......@@ -151,12 +151,11 @@ struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
}
}
};
template struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext,
float>;
template struct ClipAndFakeQuantDequantFunctor<phi::CPUContext, float>;
template <typename T>
struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx,
struct ChannelClipAndFakeQuantFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in,
const framework::Tensor &scale,
const int bin_cnt,
......@@ -176,7 +175,7 @@ struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
auto *out_data = out->mutable_data<T>(ctx.GetPlace());
auto in_dims = in.dims();
const int64_t channel = in_dims[quant_axis];
platform::Transform<platform::CPUDeviceContext> trans;
platform::Transform<phi::CPUContext> trans;
if (quant_axis == 0) {
const int64_t channel_size = in.numel() / channel;
for (int64_t i = 0; i < channel; i++) {
......@@ -235,11 +234,10 @@ struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
}
};
template struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext,
float>;
template struct ChannelClipAndFakeQuantFunctor<phi::CPUContext, float>;
template <typename T>
struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx,
struct ChannelClipFakeQuantDequantFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in,
const framework::Tensor &scale,
const int bin_cnt,
......@@ -258,7 +256,7 @@ struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
auto *out_data = out->mutable_data<T>(ctx.GetPlace());
auto in_dims = in.dims();
const int64_t channel = in_dims[quant_axis];
platform::Transform<platform::CPUDeviceContext> trans;
platform::Transform<phi::CPUContext> trans;
if (quant_axis == 0) {
const int64_t channel_size = in.numel() / channel;
for (int i = 0; i < channel; i++) {
......@@ -326,11 +324,10 @@ struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
}
};
template struct ChannelClipFakeQuantDequantFunctor<platform::CPUDeviceContext,
float>;
template struct ChannelClipFakeQuantDequantFunctor<phi::CPUContext, float>;
template <typename T>
struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx,
struct FindRangeAbsMaxFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const framework::Tensor &cur_scale,
const framework::Tensor &last_scale,
const framework::Tensor &iter,
......@@ -349,18 +346,17 @@ struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, T> {
max = cur;
} else if (fabs(removed - max) < 1e-6) {
int size = (it > window_size) ? window_size : it;
FindAbsMaxFunctor<platform::CPUDeviceContext, T>()(
ctx, scale_arr, size, &max);
FindAbsMaxFunctor<phi::CPUContext, T>()(ctx, scale_arr, size, &max);
}
out_scale->mutable_data<T>(ctx.GetPlace())[0] = max;
}
};
template struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, float>;
template struct FindRangeAbsMaxFunctor<phi::CPUContext, float>;
template <typename T>
struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext &ctx,
struct FindMovingAverageAbsMaxFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext &ctx,
const framework::Tensor &in_accum,
const framework::Tensor &in_state,
const T *cur_scale,
......@@ -382,8 +378,7 @@ struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext, T> {
}
};
template struct FindMovingAverageAbsMaxFunctor<platform::CPUDeviceContext,
float>;
template struct FindMovingAverageAbsMaxFunctor<phi::CPUContext, float>;
class FakeQuantOrWithDequantAbsMaxOp : public framework::OperatorWithKernel {
public:
......@@ -968,7 +963,7 @@ class StrightThroughEstimatorMaker : public framework::SingleGradOpMaker<T> {
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
using CPU = phi::CPUContext;
REGISTER_OPERATOR(
fake_quantize_abs_max,
......
......@@ -223,7 +223,6 @@ REGISTER_OPERATOR(
ops::FCOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
fc,
ops::FCOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::FCOpKernel<paddle::platform::CPUDeviceContext, double>);
REGISTER_OP_CPU_KERNEL(fc,
ops::FCOpKernel<phi::CPUContext, float>,
ops::FCOpKernel<phi::CPUContext, double>);
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册