diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index e3fc21c90f95469d646139a4454501d1c30bd51c..650bc92be22af9ea8afcacf590a11190109e8811 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -70,16 +70,16 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const { framework::OpKernelType ConvOp::GetExpectedKernelType( const framework::ExecutionContext& ctx) const { - framework::LibraryType library_{framework::LibraryType::kPlain}; + framework::LibraryType library{framework::LibraryType::kPlain}; #ifdef PADDLE_WITH_CUDA if (platform::CanCUDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kCUDNN; + library = framework::LibraryType::kCUDNN; } #endif #ifdef PADDLE_WITH_MKLDNN - if (library_ == framework::LibraryType::kPlain && + if (library == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { - library_ = framework::LibraryType::kMKLDNN; + library = framework::LibraryType::kMKLDNN; } #endif @@ -91,15 +91,15 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( "input and filter data type should be consistent"); if (input_data_type == framework::proto::VarType::FP16) { - PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN, + PADDLE_ENFORCE_EQ(library, framework::LibraryType::kCUDNN, "float16 can only be used when CUDNN is used"); } std::string data_format = ctx.Attr("data_format"); // TODO(pzelazko-intel): enable MKLDNN layout when it's ready - framework::DataLayout layout_ = framework::StringToDataLayout(data_format); - return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_, - library_); + framework::DataLayout layout = framework::StringToDataLayout(data_format); + return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, + library); } Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker) diff --git a/paddle/fluid/operators/feed_op.cc b/paddle/fluid/operators/feed_op.cc index 90c31877f6a87d1e237283d489353b4aba26c97b..debacf07c360b9aa69000a0d891f04239ed08807 100644 --- a/paddle/fluid/operators/feed_op.cc +++ b/paddle/fluid/operators/feed_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { @@ -28,6 +29,10 @@ class FeedOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { + // get device context from pool + auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place); + platform::RecordEvent record_event(Type(), dev_ctx); + auto feed_var_name = Input("X"); auto *feed_var = scope.FindVar(feed_var_name); @@ -50,14 +55,10 @@ class FeedOp : public framework::OperatorBase { auto &feed_item = feed_list.at(static_cast(col)); auto *out_item = out_var->GetMutable(); - // get device context from pool - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(place); - if (platform::is_same_place(feed_item.place(), place)) { out_item->ShareDataWith(feed_item); } else { - framework::TensorCopy(feed_item, place, dev_ctx, out_item); + framework::TensorCopy(feed_item, place, *dev_ctx, out_item); } out_item->set_lod(feed_item.lod()); } diff --git a/paddle/fluid/operators/fetch_op.cc b/paddle/fluid/operators/fetch_op.cc index d66f01d1b7ce8528a7c0177b2889aff7e0c5a12b..7c7f3e9059fbb1e3f2cca4f04edfff55c9452761 100644 --- a/paddle/fluid/operators/fetch_op.cc +++ b/paddle/fluid/operators/fetch_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { @@ -29,6 +30,9 @@ class FetchOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + platform::RecordEvent record_event(Type(), pool.Get(place)); + auto fetch_var_name = Input("X"); auto *fetch_var = scope.FindVar(fetch_var_name); PADDLE_ENFORCE(fetch_var != nullptr, @@ -53,7 +57,6 @@ class FetchOp : public framework::OperatorBase { // FIXME(yuyang18): Should we assume the fetch operator always generate // CPU outputs? - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(src_item.place()); TensorCopy(src_item, platform::CPUPlace(), dev_ctx, &dst_item); diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc index 05f809ac5628420251957116bb2390b4502f11b8..6ffe0bec5e38432676ecadfa1abbbe70a1425bb1 100644 --- a/paddle/fluid/operators/load_op.cc +++ b/paddle/fluid/operators/load_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { @@ -29,6 +30,9 @@ class LoadOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { + auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place); + platform::RecordEvent record_event(Type(), dev_ctx); + auto filename = Attr("file_path"); std::ifstream fin(filename); PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s for load op", @@ -41,9 +45,7 @@ class LoadOp : public framework::OperatorBase { auto *tensor = out_var->GetMutable(); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto &dev_ctx = *pool.Get(place); - DeserializeFromStream(fin, tensor, dev_ctx); + DeserializeFromStream(fin, tensor, *dev_ctx); if (platform::is_gpu_place(place)) { // copy CPU to GPU @@ -55,7 +57,7 @@ class LoadOp : public framework::OperatorBase { out_var->Clear(); tensor = out_var->GetMutable(); tensor->set_lod(cpu_tensor.lod()); - TensorCopy(cpu_tensor, place, dev_ctx, tensor); + TensorCopy(cpu_tensor, place, *dev_ctx, tensor); } } };