From 0f353ab46ed75ab99213f5ee38fcd1867838ef5f Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Mon, 8 Jan 2018 13:10:44 +0800 Subject: [PATCH] cpu gpu transform function (#7191) * add rename guard * add device_data_transform * add device_data_transform_test * modify GetExpectedKernelType * update operator.run * support test test_label_semantic_roles * optimize code * optimize code * rename GetActualKernelType to GetExpectedKernelType * fix chunk_eval_op and device_data_transform_test * add is_same_place to place * optimize code, refine rename_guard * refine rename guard, add GetKernelTypeForVar * optimize code * add some log * rename guard * use sub scope to create var * fix compile * add IsInitialized for Tensor * add VarIsTensor * fix op_registry_test * test * tmp disable priority * restore switch_kernel.md * code clean --- paddle/framework/CMakeLists.txt | 7 +- paddle/framework/data_transform.cc | 33 ++++ paddle/framework/data_transform.h | 8 + paddle/framework/device_data_transform.cc | 46 +++++ paddle/framework/device_data_transform.h | 27 +++ .../framework/device_data_transform_test.cu | 168 ++++++++++++++++++ paddle/framework/op_registry_test.cc | 22 +-- paddle/framework/operator.cc | 151 +++++----------- paddle/framework/operator.h | 7 +- paddle/framework/operator_test.cc | 3 +- paddle/framework/scope.cc | 1 + paddle/framework/scope.h | 2 +- paddle/framework/tensor.h | 2 + paddle/framework/tensor_impl.h | 2 + paddle/framework/variable.h | 2 + paddle/operators/accuracy_op.cc | 2 +- paddle/operators/auc_op.cc | 2 +- paddle/operators/batch_norm_op.cc | 2 +- paddle/operators/chunk_eval_op.cc | 4 +- paddle/operators/chunk_eval_op.h | 13 +- paddle/operators/compare_op.cc | 4 +- paddle/operators/conv_op.h | 13 -- paddle/operators/crf_decoding_op.cc | 10 +- paddle/operators/crf_decoding_op.h | 3 - paddle/operators/cross_entropy_op.cc | 4 +- .../fill_constant_batch_size_like_op.cc | 2 +- paddle/operators/gather_op.cc | 4 +- paddle/operators/gaussian_random_op.cc | 2 +- paddle/operators/linear_chain_crf_op.cc | 4 +- paddle/operators/lod_reset_op.cc | 4 +- paddle/operators/logical_op.cc | 4 +- paddle/operators/lookup_table_op.cc | 4 +- paddle/operators/lstm_op.cc | 4 +- paddle/operators/multiplex_op.cc | 4 +- paddle/operators/nce_op.cc | 4 +- paddle/operators/pool_with_index_op.cc | 4 +- paddle/operators/positive_negative_pair_op.cc | 2 +- paddle/operators/precision_recall_op.cc | 2 +- paddle/operators/roi_pool_op.cc | 4 +- paddle/operators/scatter_op.cc | 4 +- paddle/operators/sequence_pool_op.cc | 2 +- paddle/operators/sequence_slice_op.cc | 4 +- .../softmax_with_cross_entropy_op.cc | 4 +- paddle/operators/sum_op.cc | 2 +- paddle/operators/uniform_random_op.cc | 2 +- paddle/operators/unpool_op.cc | 4 +- paddle/platform/place.cc | 12 ++ paddle/platform/place.h | 1 + .../tests/book/test_label_semantic_roles.py | 8 +- 49 files changed, 429 insertions(+), 200 deletions(-) create mode 100644 paddle/framework/device_data_transform.cc create mode 100644 paddle/framework/device_data_transform.h create mode 100644 paddle/framework/device_data_transform_test.cu diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 3967a4013..46439c2d2 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -32,7 +32,9 @@ cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) cc_library(scope SRCS scope.cc DEPS glog threadpool) cc_test(scope_test SRCS scope_test.cc DEPS scope) -cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor framework_proto) +cc_library(device_data_transform SRCS device_data_transform.cc DEPS tensor) + +cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor framework_proto selected_rows device_data_transform) cc_test(data_transform_test SRCS data_transform_test.cc DEPS data_transform device_context) cc_library(attribute SRCS attribute.cc DEPS framework_proto) @@ -77,3 +79,6 @@ cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece operat cc_test(init_test SRCS init_test.cc DEPS init) cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) + +nv_test(device_data_transform_test SRCS device_data_transform_test.cu + DEPS operator op_registry init math_function) diff --git a/paddle/framework/data_transform.cc b/paddle/framework/data_transform.cc index 6b1780968..55825c5b7 100644 --- a/paddle/framework/data_transform.cc +++ b/paddle/framework/data_transform.cc @@ -14,7 +14,9 @@ limitations under the License. */ #include #include "paddle/framework/data_transform.h" +#include "paddle/framework/device_data_transform.h" #include "paddle/framework/lod_tensor.h" +#include "paddle/framework/selected_rows.h" #include "paddle/platform/device_context.h" namespace paddle { @@ -25,6 +27,37 @@ DataTransformFnMap& DataTransformFnMap::Instance() { return data_transform_map; } +Tensor* DataTransform(const OpKernelType& expected_kernel_type, + const OpKernelType& kernel_type_for_var, + const Tensor& input_tensor) { + Tensor* out = nullptr; + if (!platform::is_same_place(kernel_type_for_var.place_, + expected_kernel_type.place_)) { + out = DeviceTransform(input_tensor, expected_kernel_type.place_); + } + PADDLE_ENFORCE_NOT_NULL(out, "out should not be null"); + return out; +} + +void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, + Variable& out_var) { + if (in_var.IsType()) { + auto& in_lod_tensor = in_var.Get(); + auto* tran_lod_tensor = out_var.GetMutable(); + tran_lod_tensor->set_lod(in_lod_tensor.lod()); + tran_lod_tensor->set_layout(in_lod_tensor.layout()); + tran_lod_tensor->ShareDataWith(tensor); + } else if (in_var.IsType()) { + auto& in_selected_rows = in_var.Get(); + auto* trans_selected_rows = out_var.GetMutable(); + trans_selected_rows->set_height(in_selected_rows.height()); + trans_selected_rows->set_rows(in_selected_rows.rows()); + trans_selected_rows->mutable_value()->ShareDataWith(tensor); + } else { + PADDLE_THROW("unknown var type"); + } +} + auto KernelFP32 = OpKernelType(proto::DataType::FP32, platform::CPUPlace(), DataLayout::kNHWC, LibraryType::kPlain); diff --git a/paddle/framework/data_transform.h b/paddle/framework/data_transform.h index 56ebc80f4..42fc5f4d7 100644 --- a/paddle/framework/data_transform.h +++ b/paddle/framework/data_transform.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include "paddle/framework/op_kernel_type.h" +#include "paddle/framework/selected_rows.h" #include "paddle/framework/tensor.h" #include "paddle/framework/variable.h" #include "paddle/operators/math/math_function.h" @@ -49,6 +50,13 @@ struct KernelTypePairHash { } }; +Tensor* DataTransform(const OpKernelType& expected_kernel_type, + const OpKernelType& kernel_type_for_var, + const Tensor& input_tensor); + +void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, + Variable& out_var); + template struct CastDataTypeFunctor { HOSTDEVICE inline OutType operator()(InType in) const { diff --git a/paddle/framework/device_data_transform.cc b/paddle/framework/device_data_transform.cc new file mode 100644 index 000000000..4f9b7e96a --- /dev/null +++ b/paddle/framework/device_data_transform.cc @@ -0,0 +1,46 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/device_data_transform.h" + +namespace paddle { +namespace framework { + +static const platform::DeviceContext* GetDeviceContext( + const platform::Place& src_place, const platform::Place& dst_place) { + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + + if (platform::is_gpu_place(src_place) && platform::is_cpu_place(dst_place)) { + return pool.Get(src_place); + } else if (platform::is_cpu_place(src_place) && + platform::is_gpu_place(dst_place)) { + return pool.Get(dst_place); + } else { + PADDLE_THROW( + "Currently, model parallelism is only supported between CPU and CUDA"); + } +} + +Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) { + VLOG(3) << "DeviceTransform in, src_place " << in.place() + << " dst_place: " << dst_place; + Tensor* out = new Tensor(); + auto* dev_ctx = GetDeviceContext(in.place(), dst_place); + dev_ctx->Wait(); + CopyFrom(in, dst_place, *dev_ctx, out); + dev_ctx->Wait(); + return out; +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/device_data_transform.h b/paddle/framework/device_data_transform.h new file mode 100644 index 000000000..bebf0d1b3 --- /dev/null +++ b/paddle/framework/device_data_transform.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/tensor.h" +#include "paddle/framework/tensor_util.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace framework { + +Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place); + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/device_data_transform_test.cu b/paddle/framework/device_data_transform_test.cu new file mode 100644 index 000000000..e9100053d --- /dev/null +++ b/paddle/framework/device_data_transform_test.cu @@ -0,0 +1,168 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "gtest/gtest.h" + +#include "paddle/framework/init.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_info.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/elementwise_op_function.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace framework { + +template +struct AddFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { return a + b; } +}; + +class OpKernelTestProtoAndCheckerMaker : public OpProtoAndCheckerMaker { + public: + OpKernelTestProtoAndCheckerMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("input", "input1 of test op"); + AddOutput("output", "output of test op"); + AddAttr("use_gpu", "force to use gpu kernel").SetDefault(false); + AddComment("This is test op"); + } +}; + +class TestOpWithKernel : public OperatorWithKernel { + public: + using OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override {} + OpKernelType GetExpectedKernelType( + const ExecutionContext& ctx) const override { + if (Attr("use_gpu")) { + VLOG(3) << "force use gpu kernel"; + return OpKernelType(proto::DataType::FP32, platform::CUDAPlace(0)); + } else { + VLOG(3) << "use default kernel"; + return OpKernelType(proto::DataType::FP32, + ctx.Input("input")->place()); + } + } +}; + +template +class TestKernel : public OpKernel { + public: + void Compute(const ExecutionContext& ctx) const { + std::cout << ctx.op().DebugString() << std::endl; + + const Tensor* input = ctx.Input("input"); + + std::cout << "input place:" << input->place() << std::endl; + auto* output = ctx.Output("output"); + output->Resize(input->dims()); + output->mutable_data(ctx.GetPlace()); + + operators::TransformFunctor, T, DeviceContext> functor( + input, input, output, ctx.template device_context(), + AddFunctor()); + functor.Run(); + } +}; + +} // namespace framework +} // namespace paddle + +REGISTER_OP_WITHOUT_GRADIENT( + test_op, paddle::framework::TestOpWithKernel, + paddle::framework::OpKernelTestProtoAndCheckerMaker); +REGISTER_OP_CPU_KERNEL( + test_op, + paddle::framework::TestKernel); +REGISTER_OP_CUDA_KERNEL( + test_op, + paddle::framework::TestKernel); + +static void BuildVar(const std::string& param_name, + std::initializer_list arguments, + paddle::framework::proto::OpDesc::Var* var) { + var->set_parameter(param_name); + for (auto& arg_name : arguments) { + *var->mutable_arguments()->Add() = arg_name; + } +} + +TEST(Operator, CPUtoGPU) { + using namespace paddle::framework; + using namespace paddle::platform; + + ASSERT_EQ(InitDevices({"CPU", "GPU:0"}), true); + + paddle::framework::Scope scope; + paddle::platform::CPUPlace cpu_place; + + // create an op to run on CPU + paddle::framework::proto::OpDesc cpu_op_desc; + cpu_op_desc.set_type("test_op"); + BuildVar("input", {"IN1"}, cpu_op_desc.add_inputs()); + BuildVar("output", {"OUT1"}, cpu_op_desc.add_outputs()); + + auto cpu_op = paddle::framework::OpRegistry::CreateOp(cpu_op_desc); + // prepare input + auto* in_t = scope.Var("IN1")->GetMutable(); + auto* src_ptr = in_t->mutable_data({2, 3}, CPUPlace()); + for (int i = 0; i < 2 * 3; ++i) { + src_ptr[i] = static_cast(i); + } + + // get output + auto* output = scope.Var("OUT1"); + cpu_op->Run(scope, cpu_place); + + auto* output_ptr = output->Get().data(); + for (int i = 0; i < 2 * 3; ++i) { + ASSERT_EQ(output_ptr[i], static_cast(i) * 2); + } + + // create an op to run on GPU + paddle::framework::proto::OpDesc gpu_op_desc; + gpu_op_desc.set_type("test_op"); + BuildVar("input", {"OUT1"}, gpu_op_desc.add_inputs()); + BuildVar("output", {"OUT2"}, gpu_op_desc.add_outputs()); + + auto attr = gpu_op_desc.mutable_attrs()->Add(); + attr->set_name("use_gpu"); + attr->set_type(paddle::framework::proto::AttrType::BOOLEAN); + attr->set_b(true); + + auto gpu_op = paddle::framework::OpRegistry::CreateOp(gpu_op_desc); + + paddle::platform::CUDAPlace cuda_place(0); + // get output + auto* output2 = scope.Var("OUT2"); + gpu_op->Run(scope, cuda_place); + + // auto* output2_ptr = output2->Get().data(); + DeviceContextPool& pool = DeviceContextPool::Instance(); + auto dev_ctx = pool.Get(cuda_place); + + paddle::framework::Tensor output_tensor; + CopyFrom(output2->Get(), paddle::platform::CPUPlace(), *dev_ctx, + &output_tensor); + + dev_ctx->Wait(); + float* output2_ptr = output_tensor.data(); + for (int i = 0; i < 2 * 3; ++i) { + ASSERT_EQ(output2_ptr[i], static_cast(i) * 4); + } +} diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc index a286925bb..f7a10ada8 100644 --- a/paddle/framework/op_registry_test.cc +++ b/paddle/framework/op_registry_test.cc @@ -218,7 +218,7 @@ class OpWithKernelTest : public OperatorWithKernel { protected: void InferShape(InferShapeContext* ctx) const override {} - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType(proto::DataType::FP32, ctx.device_context()); } @@ -282,16 +282,11 @@ class OpWithMultiKernelTest : public OperatorWithKernel { protected: void InferShape(InferShapeContext* ctx) const override {} - framework::OpKernelType GetActualKernelType( - const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(proto::DataType::FP32, ctx.device_context()); - } - framework::OpKernelType GetExpectedKernelType( - const framework::OpKernelType& kernel) const override { - return framework::OpKernelType(kernel.data_type_, platform::CUDAPlace(0), - kernel.data_layout_, - framework::LibraryType::kCUDNN); + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + proto::DataType::FP32, platform::CUDAPlace(0), DataLayout::kAnyLayout, + framework::LibraryType::kCUDNN); } }; @@ -371,6 +366,7 @@ TEST(OperatorRegistrar, OpWithMultiKernel) { op_desc.set_type("op_with_multi_kernel"); auto op = paddle::framework::OpRegistry::CreateOp(op_desc); + // TODO(qiao) add priority back // use all available kernels paddle::framework::UseALL(); op->Run(scope, cuda_place); @@ -380,16 +376,16 @@ TEST(OperatorRegistrar, OpWithMultiKernel) { paddle::framework::UseCPU(); op->Run(scope, cpu_place); - EXPECT_EQ(op_test_value, -9); + EXPECT_EQ(op_test_value, -20); // add cuda kernels paddle::framework::UseCUDA(); op->Run(scope, cuda_place); - EXPECT_EQ(op_test_value, -10); + EXPECT_EQ(op_test_value, -30); // use cudnn kernel paddle::framework::UseCUDNN(); op->Run(scope, cuda_place); - EXPECT_EQ(op_test_value, -20); + EXPECT_EQ(op_test_value, -40); } diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 4ef0c2523..adc85b104 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -14,11 +14,10 @@ limitations under the License. */ #include #include -#include #include "paddle/framework/data_transform.h" +#include "paddle/framework/device_data_transform.h" #include "paddle/framework/executor.h" -#include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/operator.h" #include "paddle/framework/shape_inference.h" #include "paddle/framework/var_type.h" @@ -243,6 +242,10 @@ void OperatorBase::GenerateTemporaryNames() { } } +static bool VarIsTensor(const Variable* var) { + return var->IsType() || var->IsType(); +} + static const Tensor* GetTensorFromVar(const Variable* var) { const Tensor* t = nullptr; if (var->IsType()) { @@ -453,30 +456,6 @@ class RuntimeInferShapeContext : public InferShapeContext { const Scope& scope_; }; -const platform::DeviceContext* GetDeviceContext( - framework::KernelTypePair& kernel_pair) { - auto& actual_kernel_key = kernel_pair.first; - auto& expected_kernel_key = kernel_pair.second; - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - - if (platform::is_gpu_place(actual_kernel_key.place_) && - platform::is_cpu_place(expected_kernel_key.place_)) { - return pool.Get(actual_kernel_key.place_); - } else if (platform::is_cpu_place(actual_kernel_key.place_) && - platform::is_gpu_place(expected_kernel_key.place_)) { - return pool.Get(expected_kernel_key.place_); - } else { - PADDLE_THROW( - "Currently, model parallelism is only supported between CPU and CUDA"); - } -} - -const platform::DeviceContext* GetDeviceContext( - const framework::OpKernelType& kernel) { - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - return pool.Get(kernel.place_); -} - void OperatorWithKernel::Run(const Scope& scope, const platform::Place& place) const { RuntimeInferShapeContext infer_shape_ctx(*this, scope); @@ -492,94 +471,43 @@ void OperatorWithKernel::Run(const Scope& scope, "There are no kernels which are registered in the %s operator.", type_); } - // check if op[type] have kernel for kernel_key - OpKernelMap& kernels = kernels_iter->second; - ExecutionContext ctx(*this, scope, *dev_ctx); - auto actual_kernel_key = GetActualKernelType(ctx); - - auto expected_kernel_key = GetExpectedKernelType(actual_kernel_key); - - if (actual_kernel_key == expected_kernel_key) { - PADDLE_ENFORCE_EQ(actual_kernel_key.place_, expected_kernel_key.place_, - "Currently, model parallelism is only supported between " - "CPU and other devices. For example, multi-GPU model " - "parallelism will failed."); - } else { - // find the best key candidate - const DataTransformFnMap& trans_map = DataTransformFnMap::Instance(); - for (auto& candidate : kKernelPriority) { - auto candidate_key = - OpKernelType(actual_kernel_key.data_type_, std::get<0>(candidate), - actual_kernel_key.data_layout_, std::get<1>(candidate)); - - auto candidate_pair = std::make_pair(actual_kernel_key, candidate_key); - if ((actual_kernel_key == candidate_key) || - (kernels.count(candidate_key) && - trans_map.GetNullable(candidate_pair))) { - expected_kernel_key = candidate_key; - break; - } - } - - auto kernel_pair = std::make_pair(actual_kernel_key, expected_kernel_key); - const DataTransformFn* trans_fun = trans_map.GetNullable(kernel_pair); - if (trans_fun) { - auto input_vars = this->InputVars(); - // TODO(qijun) filter the input vars that do not need to be transformed - - // filter vars that has been transformed - std::vector need_trans; - for (auto var_name : input_vars) { - auto var_name_trans = - var_name + framework::KernelTypeToString(expected_kernel_key); - if (!scope.FindVar(var_name_trans)) { - const_cast(scope).Var(var_name_trans); - need_trans.push_back(var_name); - } - } - - if (!need_trans.empty()) { - auto trans_dev_ctx = GetDeviceContext(kernel_pair); - - // Wait for transform starting - dev_ctx->Wait(); - - for (auto var_name : need_trans) { - (*trans_fun)(trans_dev_ctx, kernel_pair, *(scope.FindVar(var_name)), - scope.FindVar(var_name + framework::KernelTypeToString( - expected_kernel_key))); + auto expected_kernel_key = this->GetExpectedKernelType(ctx); + + Scope& new_scope = scope.NewScope(); + + for (auto& var_name_item : this->Inputs()) { + for (auto& var_name : var_name_item.second) { + auto* var = scope.FindVar(var_name); + if (var && VarIsTensor(var)) { + auto* tensor_in = GetTensorFromVar(var); + if (tensor_in->IsInitialized()) { + auto kernel_type_for_var = this->GetKernelTypeForVar( + var_name_item.first, *tensor_in, expected_kernel_key); + if (kernel_type_for_var != expected_kernel_key) { + auto out_var_names = OutputVars(true); + if (std::find(out_var_names.begin(), out_var_names.end(), + var_name) != out_var_names.end()) { + PADDLE_THROW( + "var %s is both input and output, " + "does not support transform", + var_name); + } + VLOG(3) << "need to do transform for var " << var_name; + auto* trans_var = new_scope.Var(var_name); + auto* out = DataTransform(expected_kernel_key, kernel_type_for_var, + *tensor_in); + CopyVariableWithTensor(*var, *out, *trans_var); + } } - // Wait for data transform finishing - trans_dev_ctx->Wait(); } } } - VLOG(10) << "Actual kernel: " << actual_kernel_key - << "Expected kernel: " << expected_kernel_key; - + OpKernelMap& kernels = kernels_iter->second; auto kernel_iter = kernels.find(expected_kernel_key); - if (kernel_iter == kernels.end()) { - PADDLE_THROW("The operator %s does not support %s", type_, - expected_kernel_key); - } - - auto* expected_dev_ctx = GetDeviceContext(expected_kernel_key); - ExecutionContext expected_ctx(*this, scope, *expected_dev_ctx); - - kernel_iter->second->Compute(expected_ctx); -} - -OpKernelType OperatorWithKernel::GetActualKernelType( - const ExecutionContext& ctx) const { - return OpKernelType(IndicateDataType(ctx), ctx.GetPlace()); -} - -OpKernelType OperatorWithKernel::GetExpectedKernelType( - const OpKernelType& actual_kernel_type) const { - return actual_kernel_type; + kernel_iter->second->Compute(ExecutionContext(*this, new_scope, *dev_ctx)); } proto::DataType OperatorWithKernel::IndicateDataType( @@ -611,5 +539,16 @@ proto::DataType OperatorWithKernel::IndicateDataType( return static_cast(data_type); } +OpKernelType OperatorWithKernel::GetExpectedKernelType( + const ExecutionContext& ctx) const { + return OpKernelType(IndicateDataType(ctx), ctx.GetPlace()); +} + +OpKernelType OperatorWithKernel::GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const OpKernelType& expected_kernel_type) const { + return OpKernelType(expected_kernel_type.data_type_, tensor.place()); +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 800397c07..d5feb5986 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -408,9 +408,10 @@ class OperatorWithKernel : public OperatorBase { } protected: - virtual OpKernelType GetActualKernelType(const ExecutionContext& ctx) const; - virtual OpKernelType GetExpectedKernelType( - const OpKernelType& actual_kernel_type) const; + virtual OpKernelType GetExpectedKernelType(const ExecutionContext& ctx) const; + virtual OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const OpKernelType& expected_kernel_type) const; private: // indicate kernel DataType by input data. Defaultly all input data must be diff --git a/paddle/framework/operator_test.cc b/paddle/framework/operator_test.cc index 4d38a7ada..d002f3f23 100644 --- a/paddle/framework/operator_test.cc +++ b/paddle/framework/operator_test.cc @@ -114,7 +114,8 @@ class OpWithKernelTest : public OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override {} - OpKernelType GetActualKernelType(const ExecutionContext& ctx) const override { + OpKernelType GetExpectedKernelType( + const ExecutionContext& ctx) const override { return OpKernelType(proto::DataType::FP32, ctx.GetPlace()); } }; diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index 4e80e3d97..2bd0ac8f5 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -109,6 +109,7 @@ std::string Scope::Rename(const std::string& origin_name) const { Rename(origin_name, var_name); return var_name; } + Variable* Scope::FindVarLocally(const std::string& name) const { auto it = vars_.find(name); if (it != vars_.end()) return it->second; diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index 10143326d..a1da81cc7 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -75,9 +75,9 @@ class Scope { // Rename variable to a new name and return the new name std::string Rename(const std::string& origin_name) const; - private: Variable* FindVarLocally(const std::string& name) const; + private: // Call Scope::NewScope for a sub-scope. explicit Scope(Scope const* parent) : parent_(parent) {} diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 02b125cbb..4aaa29d79 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -55,6 +55,8 @@ class Tensor { template inline const T* data() const; + inline bool IsInitialized() const; + inline void switch_place(platform::Place new_place); /** diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 6c6f298ed..1340c5e48 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -84,6 +84,8 @@ inline const T* Tensor::data() const { reinterpret_cast(holder_->ptr()) + offset_); } +inline bool Tensor::IsInitialized() const { return holder_ != nullptr; } + template inline T* Tensor::data() { check_memory_size(); diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h index e5a94759f..36b76fb19 100644 --- a/paddle/framework/variable.h +++ b/paddle/framework/variable.h @@ -32,6 +32,8 @@ class Variable { return *static_cast(holder_->Ptr()); } + bool IsInitialized() const { return holder_ != nullptr; } + template T* GetMutable() { if (!IsType()) { diff --git a/paddle/operators/accuracy_op.cc b/paddle/operators/accuracy_op.cc index d7baa6e90..8e8a3c7dd 100644 --- a/paddle/operators/accuracy_op.cc +++ b/paddle/operators/accuracy_op.cc @@ -53,7 +53,7 @@ class AccuracyOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Out")->type()), diff --git a/paddle/operators/auc_op.cc b/paddle/operators/auc_op.cc index c16bc1193..b6494f950 100644 --- a/paddle/operators/auc_op.cc +++ b/paddle/operators/auc_op.cc @@ -39,7 +39,7 @@ class AucOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Out")->type()), diff --git a/paddle/operators/batch_norm_op.cc b/paddle/operators/batch_norm_op.cc index dd7b038b0..0e984c38b 100644 --- a/paddle/operators/batch_norm_op.cc +++ b/paddle/operators/batch_norm_op.cc @@ -306,7 +306,7 @@ class BatchNormGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { const auto *var = ctx.InputVar(framework::GradVarName("Y")); if (var == nullptr) { diff --git a/paddle/operators/chunk_eval_op.cc b/paddle/operators/chunk_eval_op.cc index a04040426..44f667aea 100644 --- a/paddle/operators/chunk_eval_op.cc +++ b/paddle/operators/chunk_eval_op.cc @@ -55,10 +55,10 @@ class ChunkEvalOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType(framework::proto::DataType::FP32, - ctx.device_context()); + platform::CPUPlace()); } }; diff --git a/paddle/operators/chunk_eval_op.h b/paddle/operators/chunk_eval_op.h index 74ab435c8..300aff90c 100644 --- a/paddle/operators/chunk_eval_op.h +++ b/paddle/operators/chunk_eval_op.h @@ -145,6 +145,7 @@ class ChunkEvalKernel : public framework::OpKernel { context.Attr>("excluded_chunk_types").end()); auto* inference = context.Input("Inference"); + auto place = inference->place(); auto* label = context.Input("Label"); auto* precision = context.Output("Precision"); auto* recall = context.Output("Recall"); @@ -155,15 +156,15 @@ class ChunkEvalKernel : public framework::OpKernel { const int64_t* inference_data = inference->data(); const int64_t* label_data = label->data(); - T* precision_data = precision->mutable_data(context.GetPlace()); - T* racall_data = recall->mutable_data(context.GetPlace()); - T* f1_data = f1->mutable_data(context.GetPlace()); + T* precision_data = precision->mutable_data(place); + T* racall_data = recall->mutable_data(place); + T* f1_data = f1->mutable_data(place); int64_t* num_infer_chunks_data = - num_infer_chunks->mutable_data(context.GetPlace()); + num_infer_chunks->mutable_data(place); int64_t* num_label_chunks_data = - num_label_chunks->mutable_data(context.GetPlace()); + num_label_chunks->mutable_data(place); int64_t* num_correct_chunks_data = - num_correct_chunks->mutable_data(context.GetPlace()); + num_correct_chunks->mutable_data(place); *num_infer_chunks_data = 0; *num_label_chunks_data = 0; *num_correct_chunks_data = 0; diff --git a/paddle/operators/compare_op.cc b/paddle/operators/compare_op.cc index 44665b787..daa2c193b 100644 --- a/paddle/operators/compare_op.cc +++ b/paddle/operators/compare_op.cc @@ -66,9 +66,9 @@ class CompareOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - framework::OpKernelType kt = OperatorWithKernel::GetActualKernelType(ctx); + framework::OpKernelType kt = OperatorWithKernel::GetExpectedKernelType(ctx); // CompareOp kernel's device type is decided by input tensor place kt.place_ = ctx.Input("X")->place(); return kt; diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h index fe3c0bc93..83786e232 100644 --- a/paddle/operators/conv_op.h +++ b/paddle/operators/conv_op.h @@ -62,25 +62,12 @@ class ConvOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; - framework::OpKernelType GetExpectedKernelType( - const framework::OpKernelType& kernel) const override { - return framework::OpKernelType(kernel.data_type_, platform::CUDAPlace(0), - kernel.data_layout_, - framework::LibraryType::kCUDNN); - } }; class ConvOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override; - - framework::OpKernelType GetExpectedKernelType( - const framework::OpKernelType& kernel) const override { - return framework::OpKernelType(kernel.data_type_, platform::CUDAPlace(0), - kernel.data_layout_, - framework::LibraryType::kCUDNN); - } }; template diff --git a/paddle/operators/crf_decoding_op.cc b/paddle/operators/crf_decoding_op.cc index 024e1d061..30626028c 100644 --- a/paddle/operators/crf_decoding_op.cc +++ b/paddle/operators/crf_decoding_op.cc @@ -120,17 +120,11 @@ class CRFDecodingOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Emission")->type()), - ctx.device_context()); - } - - framework::OpKernelType GetExpectedKernelType( - const framework::OpKernelType& actual_kernel_type) const override { - return framework::OpKernelType(actual_kernel_type.data_type_, - platform::CPUPlace()); + platform::CPUPlace()); } }; } // namespace operators diff --git a/paddle/operators/crf_decoding_op.h b/paddle/operators/crf_decoding_op.h index f6827b7b1..ce2f4e662 100644 --- a/paddle/operators/crf_decoding_op.h +++ b/paddle/operators/crf_decoding_op.h @@ -28,9 +28,6 @@ template class CRFDecodingOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()), - "The crf_decoding operator can only run on CPU."); - auto* emission_weights = ctx.Input("Emission"); auto* transition_weights = ctx.Input("Transition"); auto* label = ctx.Input("Label"); diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index fe39cb481..7abd5b1c6 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -51,7 +51,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of cross_entropy // is determined by its input "X". - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -101,7 +101,7 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of cross_entropy // is determined by its input "X". - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/fill_constant_batch_size_like_op.cc b/paddle/operators/fill_constant_batch_size_like_op.cc index 852ecdfe4..c74a5b6ce 100644 --- a/paddle/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/operators/fill_constant_batch_size_like_op.cc @@ -49,7 +49,7 @@ class FillConstantBatchSizeLikeOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( static_cast(ctx.Attr("dtype")), diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 45e9d8df7..597fdad07 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -40,7 +40,7 @@ class GatherOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -57,7 +57,7 @@ class GatherGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 9ed493a7d..2dca05760 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -60,7 +60,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( static_cast(ctx.Attr("dtype")), diff --git a/paddle/operators/linear_chain_crf_op.cc b/paddle/operators/linear_chain_crf_op.cc index 666207ea0..975e394c7 100644 --- a/paddle/operators/linear_chain_crf_op.cc +++ b/paddle/operators/linear_chain_crf_op.cc @@ -183,7 +183,7 @@ class LinearChainCRFOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of computation kernel of linear_chain_crf // is determined by its input "Emission". - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Emission")->type()), @@ -242,7 +242,7 @@ class LinearChainCRFGradOp : public framework::OperatorWithKernel { protected: // Explicitly set that the data type of output of the linear_chain_crf_grad // operator is determined by its input: gradients of LogLikelihood. - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType( diff --git a/paddle/operators/lod_reset_op.cc b/paddle/operators/lod_reset_op.cc index f3c0badf2..3d7b15edc 100644 --- a/paddle/operators/lod_reset_op.cc +++ b/paddle/operators/lod_reset_op.cc @@ -38,7 +38,7 @@ class LoDResetOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -97,7 +97,7 @@ class LoDResetGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/logical_op.cc b/paddle/operators/logical_op.cc index 741719247..fedd325cf 100644 --- a/paddle/operators/logical_op.cc +++ b/paddle/operators/logical_op.cc @@ -99,9 +99,9 @@ class LogicalOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - framework::OpKernelType kt = OperatorWithKernel::GetActualKernelType(ctx); + framework::OpKernelType kt = OperatorWithKernel::GetExpectedKernelType(ctx); // LogicalOp kernel's device type is decided by input tensor place kt.place_ = ctx.Input("X")->place(); return kt; diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index 6e5cbd6f8..bb03def43 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -41,7 +41,7 @@ class LookupTableOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("W")->type()), @@ -98,7 +98,7 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("W")->type()), diff --git a/paddle/operators/lstm_op.cc b/paddle/operators/lstm_op.cc index b8fcec0f2..3b90b64b4 100644 --- a/paddle/operators/lstm_op.cc +++ b/paddle/operators/lstm_op.cc @@ -92,7 +92,7 @@ class LSTMOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), @@ -260,7 +260,7 @@ class LSTMGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), diff --git a/paddle/operators/multiplex_op.cc b/paddle/operators/multiplex_op.cc index 11e047b5d..78263da2f 100644 --- a/paddle/operators/multiplex_op.cc +++ b/paddle/operators/multiplex_op.cc @@ -51,7 +51,7 @@ class MultiplexOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.MultiInput("X")[0]->type()), @@ -102,7 +102,7 @@ class MultiplexGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.MultiInput("X")[0]->type()), diff --git a/paddle/operators/nce_op.cc b/paddle/operators/nce_op.cc index d39ca87d5..84ba3ead2 100644 --- a/paddle/operators/nce_op.cc +++ b/paddle/operators/nce_op.cc @@ -63,7 +63,7 @@ class NCEOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), @@ -166,7 +166,7 @@ class NCEOpGrad : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Input")->type()), diff --git a/paddle/operators/pool_with_index_op.cc b/paddle/operators/pool_with_index_op.cc index 76c512352..1d31d813a 100644 --- a/paddle/operators/pool_with_index_op.cc +++ b/paddle/operators/pool_with_index_op.cc @@ -69,7 +69,7 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -90,7 +90,7 @@ class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/positive_negative_pair_op.cc b/paddle/operators/positive_negative_pair_op.cc index a6b23c995..5aa5167db 100644 --- a/paddle/operators/positive_negative_pair_op.cc +++ b/paddle/operators/positive_negative_pair_op.cc @@ -85,7 +85,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Score")->type()), diff --git a/paddle/operators/precision_recall_op.cc b/paddle/operators/precision_recall_op.cc index c5753147e..f1598d53c 100644 --- a/paddle/operators/precision_recall_op.cc +++ b/paddle/operators/precision_recall_op.cc @@ -80,7 +80,7 @@ class PrecisionRecallOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("MaxProbs")->type()), diff --git a/paddle/operators/roi_pool_op.cc b/paddle/operators/roi_pool_op.cc index ef1804d97..a7351f11c 100644 --- a/paddle/operators/roi_pool_op.cc +++ b/paddle/operators/roi_pool_op.cc @@ -68,7 +68,7 @@ class ROIPoolOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -89,7 +89,7 @@ class ROIPoolGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index 806dccc6c..b65334890 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -49,7 +49,7 @@ class ScatterOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Ref")->type()), @@ -68,7 +68,7 @@ class ScatterGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Ref")->type()), diff --git a/paddle/operators/sequence_pool_op.cc b/paddle/operators/sequence_pool_op.cc index aea98744d..34e1a1259 100644 --- a/paddle/operators/sequence_pool_op.cc +++ b/paddle/operators/sequence_pool_op.cc @@ -107,7 +107,7 @@ class SequencePoolGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/sequence_slice_op.cc b/paddle/operators/sequence_slice_op.cc index 98bd88549..f79106ff0 100644 --- a/paddle/operators/sequence_slice_op.cc +++ b/paddle/operators/sequence_slice_op.cc @@ -48,7 +48,7 @@ class SequenceSliceOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -69,7 +69,7 @@ class SequenceSliceGradOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/operators/softmax_with_cross_entropy_op.cc b/paddle/operators/softmax_with_cross_entropy_op.cc index 41e65b701..7135780c9 100644 --- a/paddle/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/operators/softmax_with_cross_entropy_op.cc @@ -118,7 +118,7 @@ class SoftmaxWithCrossEntropyOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("Logits")->type()), @@ -159,7 +159,7 @@ class SoftmaxWithCrossEntropyOpGrad : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType( diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index b86e82664..a4c08430d 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -53,7 +53,7 @@ class SumOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { auto x_vars = ctx.MultiInputVar("X"); if (x_vars[0]->IsType()) { diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index 4d5dd86cb..3a314bdb9 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -63,7 +63,7 @@ class UniformRandomOp : public framework::OperatorWithKernel { } protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( static_cast(ctx.Attr("dtype")), diff --git a/paddle/operators/unpool_op.cc b/paddle/operators/unpool_op.cc index aeed9679b..50cee11a7 100644 --- a/paddle/operators/unpool_op.cc +++ b/paddle/operators/unpool_op.cc @@ -71,7 +71,7 @@ int OutputSize(int input_size, int ksize, int padding, int stride) { class UnpoolOp : public framework::OperatorWithKernel { protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), @@ -110,7 +110,7 @@ class UnpoolOp : public framework::OperatorWithKernel { class UnpoolOpGrad : public framework::OperatorWithKernel { protected: - framework::OpKernelType GetActualKernelType( + framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc index 249527e3e..f05260cca 100644 --- a/paddle/platform/place.cc +++ b/paddle/platform/place.cc @@ -51,6 +51,18 @@ bool places_are_same_class(const Place &p1, const Place &p2) { return p1.which() == p2.which(); } +bool is_same_place(const Place &p1, const Place &p2) { + if (places_are_same_class(p1, p2)) { + if (is_cpu_place(p1)) { + return true; + } else { + return boost::get(p1) == boost::get(p2); + } + } else { + return false; + } +} + std::ostream &operator<<(std::ostream &os, const Place &p) { detail::PlacePrinter printer(os); boost::apply_visitor(printer, p); diff --git a/paddle/platform/place.h b/paddle/platform/place.h index 76b5c502c..ba32dd3be 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -61,6 +61,7 @@ const CPUPlace default_cpu(); bool is_gpu_place(const Place &); bool is_cpu_place(const Place &); bool places_are_same_class(const Place &, const Place &); +bool is_same_place(const Place &, const Place &); std::ostream &operator<<(std::ostream &, const Place &); diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index 8acd470c5..74ca56182 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -4,6 +4,7 @@ import numpy as np import paddle.v2 as paddle import paddle.v2.dataset.conll05 as conll05 import paddle.v2.fluid as fluid +import time word_dict, verb_dict, label_dict = conll05.get_dict() word_dict_len = len(word_dict) @@ -160,7 +161,8 @@ def main(): paddle.reader.shuffle( paddle.dataset.conll05.test(), buf_size=8192), batch_size=BATCH_SIZE) - place = fluid.CPUPlace() + #place = fluid.CPUPlace() + place = fluid.CUDAPlace(0) feeder = fluid.DataFeeder( feed_list=[ word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target @@ -174,6 +176,7 @@ def main(): embedding_param.set( load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) + start_time = time.time() batch_id = 0 for pass_id in xrange(PASS_NUM): chunk_evaluator.reset(exe) @@ -191,6 +194,9 @@ def main(): f1_score) + " pass_precision:" + str( pass_precision) + " pass_recall:" + str(pass_recall) + " pass_f1_score:" + str(pass_f1_score)) + if batch_id != 0: + print("second per batch: " + str((time.time() - start_time) + / batch_id)) # exit early for CI exit(0) -- GitLab