diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 7d53554358497762b1cd91c39bdd23c5807af2bc..df186637726f60ee1b69cec7291477f3efcd059c 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -16,12 +16,10 @@ function(copy TARGET) foreach(index RANGE ${len}) list(GET copy_lib_SRCS ${index} src) list(GET copy_lib_DSTS ${index} dst) - add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND mkdir -p "${dst}") - if(IS_DIRECTORY ${src}) - add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND cp -r "${src}" "${dst}") - else() - add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND cp "${src}" "${dst}") - endif() + add_custom_command(TARGET ${TARGET} PRE_BUILD + COMMAND mkdir -p "${dst}" + COMMAND cp -r "${src}" "${dst}" + COMMENT "copying ${src} -> ${dst}") endforeach() endfunction() @@ -53,11 +51,11 @@ IF(NOT PROTOBUF_FOUND) ENDIF(NOT PROTOBUF_FOUND) # paddle fluid module -set(src_dir "${PADDLE_SOURCE_DIR}/paddle") -set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle") +set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") +set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid") set(module "framework") copy(framework_lib DEPS framework_py_proto - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/framework/framework.pb.h + SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ) @@ -69,7 +67,7 @@ copy(memory_lib set(module "inference") copy(inference_lib DEPENDS paddle_fluid_shared - SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/inference/libpaddle_fluid.so + SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.so DSTS ${dst_dir}/${module} ${dst_dir}/${module} ) diff --git a/paddle/fluid/framework/op_registry_test.cc b/paddle/fluid/framework/op_registry_test.cc index bfbb2cfc2c57c705cf42c65825edcc6dea08cf41..2746168f1dda493368b81820bde2f093d06d7b4e 100644 --- a/paddle/fluid/framework/op_registry_test.cc +++ b/paddle/fluid/framework/op_registry_test.cc @@ -25,7 +25,10 @@ namespace framework { class CosineOp : public OperatorBase { public: using OperatorBase::OperatorBase; - void Run(const Scope& scope, const platform::Place& place) const override {} + + private: + void RunImpl(const Scope& scope, + const platform::Place& place) const override {} }; class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { @@ -44,7 +47,10 @@ class CosineOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { class MyTestOp : public OperatorBase { public: using OperatorBase::OperatorBase; - void Run(const Scope& scope, const platform::Place& place) const override {} + + private: + void RunImpl(const Scope& scope, + const platform::Place& place) const override {} }; class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 61529fe38b15fe2a4bfa0d64159994d6b62fb086..8effbf1bc6298bdcc381e2176411a79da134653f 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -64,6 +64,18 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { } } +void OperatorBase::Run(const Scope& scope, const platform::Place& place) { + if (platform::is_gpu_place(place)) { +#ifndef PADDLE_WITH_CUDA + PADDLE_THROW("Cannot run operator on place %s", place); +#else + auto dev_id = boost::get<platform::CUDAPlace>(place).device; + platform::SetDeviceId(dev_id); +#endif + } + RunImpl(scope, place); +} + std::string OperatorBase::Input(const std::string& name) const { auto& ins = Inputs(name); PADDLE_ENFORCE_LE(ins.size(), 1UL, @@ -479,8 +491,8 @@ class RuntimeInferShapeContext : public InferShapeContext { const Scope& scope_; }; -void OperatorWithKernel::Run(const Scope& scope, - const platform::Place& place) const { +void OperatorWithKernel::RunImpl(const Scope& scope, + const platform::Place& place) const { RuntimeInferShapeContext infer_shape_ctx(*this, scope); this->InferShape(&infer_shape_ctx); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 52300abeb7df346d610d2363335dc9d3330ee39e..708f87dc8632ac500e1050122c5fd5412071fd22 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -89,8 +89,9 @@ class OperatorBase { std::string DebugString() const { return DebugStringEx(nullptr); } - /// Net will call this function to Run an op. - virtual void Run(const Scope& scope, const platform::Place& place) const = 0; + /// Net will call this interface function to Run an op. + // The implementation should be written at RunImpl + void Run(const Scope& scope, const platform::Place& place); // FIXME(typhoonzero): this is only used for recv_op to stop event_loop. virtual void Stop() {} @@ -144,6 +145,8 @@ class OperatorBase { private: void GenerateTemporaryNames(); void CheckAllInputOutputSet() const; + virtual void RunImpl(const Scope& scope, + const platform::Place& place) const = 0; }; // Macro for define a clone method. @@ -168,10 +171,13 @@ class OperatorBase { class NOP : public OperatorBase { public: using OperatorBase::OperatorBase; - void Run(const Scope& scope, const platform::Place& place) const override {} std::unique_ptr<OperatorBase> Clone() const override { return std::unique_ptr<OperatorBase>(new NOP(*this)); } + + private: + void RunImpl(const Scope& scope, + const platform::Place& place) const override {} }; class ExecutionContext { @@ -363,8 +369,6 @@ class OperatorWithKernel : public OperatorBase { const VariableNameMap& outputs, const AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const Scope& scope, const platform::Place& place) const final; - static std::unordered_map<std::string /* op_type */, OpKernelMap>& AllOpKernels() { static std::unordered_map<std::string, OpKernelMap> g_all_op_kernels; @@ -393,6 +397,7 @@ class OperatorWithKernel : public OperatorBase { // indicate kernel DataType by input data. Defaultly all input data must be // same. proto::DataType IndicateDataType(const ExecutionContext& ctx) const; + void RunImpl(const Scope& scope, const platform::Place& place) const final; }; extern bool OpSupportGPU(const std::string& op_type); diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index b90f5538bb620275521cdc11bf47b4014b2a66e2..0732ec5afe8738313e1d73c52c5303a2e8b1e96a 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -28,7 +28,10 @@ class OpWithoutKernelTest : public OperatorBase { OpWithoutKernelTest(const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, const AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs), x(1) {} - void Run(const Scope& scope, const platform::Place& place) const override { + + private: + void RunImpl(const Scope& scope, + const platform::Place& place) const override { ++op_run_num; ASSERT_EQ(static_cast<int>(inputs_.size()), 1); ASSERT_EQ(static_cast<int>(outputs_.size()), 1); @@ -259,8 +262,10 @@ class OperatorClone : public paddle::framework::OperatorBase { const paddle::framework::VariableNameMap& outputs, const paddle::framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const paddle::framework::Scope& scope, - const paddle::platform::Place& place) const override {} + + private: + void RunImpl(const paddle::framework::Scope& scope, + const paddle::platform::Place& place) const override {} }; TEST(Operator, Clone) { diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index bf8e11bd8c047275fe341ead9424d02e98d5d8f4..69464c4cff52400d8a25a692c5df8d2fe06230e4 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -31,8 +31,10 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensorArray>(); auto &rank_table = scope.FindVar(Input("RankTable"))->Get<framework::LoDRankTable>(); diff --git a/paddle/fluid/operators/assign_op.cc b/paddle/fluid/operators/assign_op.cc index f99f9af4276c0e8928f821ae166d55aed02e8e27..b72e72b12f8a6155b6eb3be1468b8dbc7bd48d4e 100644 --- a/paddle/fluid/operators/assign_op.cc +++ b/paddle/fluid/operators/assign_op.cc @@ -71,8 +71,10 @@ class AssignOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto *x = scope.FindVar(Input("X")); if (x == nullptr) { return; diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index 7737d4e098ac9a0e56e1db2aee796550e8d71ba3..6d3efcfeb8497a78d56180898e5e3a66e52ff22d 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -55,8 +55,10 @@ class BeamSearchDecodeOp : public framework::OperatorBase { const framework::VariableNameMap& outputs, const framework::AttributeMap& attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope& scope, - const platform::Place& dev_place) const override { + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& dev_ctx = *pool.Get(dev_place); diff --git a/paddle/fluid/operators/beam_search_op.h b/paddle/fluid/operators/beam_search_op.h index 9e2a05a60c30e388093aceddd40e58273364c8f9..bfbe78097d2f20ae4c5efa594d17f931c7ea5920 100644 --- a/paddle/fluid/operators/beam_search_op.h +++ b/paddle/fluid/operators/beam_search_op.h @@ -204,8 +204,9 @@ class BeamSearchOp : public framework::OperatorBase { PADDLE_THROW("Not Implemented"); } - void Run(const framework::Scope& scope, - const platform::Place& dev_place) const override { + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { auto ids_var = scope.FindVar(Input("ids")); auto scores_var = scope.FindVar(Input("scores")); auto pre_ids_var = scope.FindVar(Input("pre_ids")); diff --git a/paddle/fluid/operators/concat_op.h b/paddle/fluid/operators/concat_op.h index 878e53058567500aeb9fe854a1a65ed5380572a8..c8a4292932dfaddb4ea73a0d1c8ff6bda02ce1c0 100644 --- a/paddle/fluid/operators/concat_op.h +++ b/paddle/fluid/operators/concat_op.h @@ -38,7 +38,7 @@ class ConcatKernel : public framework::OpKernel<T> { auto in_stride = framework::stride_numel(in->dims()); StridedNumelCopyWithAxis<T>(ctx.device_context(), axis, out->data<T>() + output_offset, out_stride, - in->data<T>(), in_stride); + in->data<T>(), in_stride, in_stride[axis]); output_offset += in_stride[axis]; } } @@ -59,7 +59,7 @@ class ConcatGradKernel : public framework::OpKernel<T> { auto out_stride = framework::stride_numel(out->dims()); StridedNumelCopyWithAxis<T>(ctx.device_context(), axis, out->data<T>(), out_stride, in->data<T>() + input_offset, - in_stride); + in_stride, out_stride[axis]); input_offset += out_stride[axis]; } } diff --git a/paddle/fluid/operators/cond_op.cc b/paddle/fluid/operators/cond_op.cc index dd93790d5b52a2ccc8358a94f7ead346d384f191..d63748a61cec0f10269e05bcef3bb0d10345000d 100644 --- a/paddle/fluid/operators/cond_op.cc +++ b/paddle/fluid/operators/cond_op.cc @@ -193,7 +193,7 @@ void CondOp::MergeDataFromSubnet(const framework::Scope& scope, } } -void CondOp::Run(const Scope& scope, const platform::Place& place) const { +void CondOp::RunImpl(const Scope& scope, const platform::Place& place) const { // get device context from pool platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& dev_ctx = *pool.Get(place); diff --git a/paddle/fluid/operators/cond_op.h b/paddle/fluid/operators/cond_op.h index 695af4490696b29d2d47f5825ebc0159b39663c0..0bb14bc8c2cfabeeb13e1e1afd51b034742b74f0 100644 --- a/paddle/fluid/operators/cond_op.h +++ b/paddle/fluid/operators/cond_op.h @@ -77,8 +77,9 @@ class CondOp : public framework::OperatorBase { sub_net_op_[FALSE_BRANCH] = std::move(net); } - void Run(const framework::Scope& scope, - const platform::Place& place) const override; + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& place) const override; private: const int TRUE_BRANCH = 0; diff --git a/paddle/fluid/operators/conditional_block_op.cc b/paddle/fluid/operators/conditional_block_op.cc index 30435c6cca0a4fb1d41dce47b8fefeafb6c48a51..228b0998360550348fdd30c842a394e8f8ce5935 100644 --- a/paddle/fluid/operators/conditional_block_op.cc +++ b/paddle/fluid/operators/conditional_block_op.cc @@ -65,8 +65,10 @@ class ConditionalBlockOp : public ConditionalOp { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : ConditionalOp(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto xs = InputTensors(scope); bool need_run; @@ -128,8 +130,10 @@ class ConditionalBlockGradOp : public ConditionalOp { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : ConditionalOp(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto xs = this->InputTensors(scope); bool need_run; diff --git a/paddle/fluid/operators/create_reader_op.cc b/paddle/fluid/operators/create_reader_op.cc index d1ba51f2c0f13a1b6e4d7ccb93c912703a0b1d86..1393f1a66baaf3b53f797aa61fd42ac3cf54f8db 100644 --- a/paddle/fluid/operators/create_reader_op.cc +++ b/paddle/fluid/operators/create_reader_op.cc @@ -106,8 +106,10 @@ template <typename T> class CreateRandomDataGeneratorOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; - void Run(const framework::Scope& scope, - const platform::Place& dev_place) const override { + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { const auto& shape_concat = Attr<std::vector<int>>("shape_concat"); const auto& ranks = Attr<std::vector<int>>("ranks"); PADDLE_ENFORCE(!shape_concat.empty() && !ranks.empty()); @@ -155,8 +157,10 @@ class CreateRandomDataGeneratorOpMaker class CreateShuffleReaderOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; - void Run(const framework::Scope& scope, - const platform::Place& dev_place) const override { + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) ->Get<framework::ReaderHolder>(); auto* out = scope.FindVar(Output("Out")) @@ -187,8 +191,10 @@ class CreateShuffleReaderOpMaker : public framework::OpProtoAndCheckerMaker { class CreateBatchReaderOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; - void Run(const framework::Scope& scope, - const platform::Place& dev_place) const override { + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) ->Get<framework::ReaderHolder>(); auto* out = scope.FindVar(Output("Out")) diff --git a/paddle/fluid/operators/feed_op.cc b/paddle/fluid/operators/feed_op.cc index 0b3f5f0d1d09a932e15936285f5cb226daa86e95..41fa69a0972ef8ad528f2a04b0260c40155ffd3e 100644 --- a/paddle/fluid/operators/feed_op.cc +++ b/paddle/fluid/operators/feed_op.cc @@ -24,8 +24,10 @@ class FeedOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto feed_var_name = Input("X"); auto *feed_var = scope.FindVar(feed_var_name); diff --git a/paddle/fluid/operators/fetch_op.cc b/paddle/fluid/operators/fetch_op.cc index 54e5892016cdb01f50189147a7453b868c5a48c0..6cb5565013dcacac33e828386f1ea8909e831c1a 100644 --- a/paddle/fluid/operators/fetch_op.cc +++ b/paddle/fluid/operators/fetch_op.cc @@ -26,8 +26,9 @@ class FetchOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto fetch_var_name = Input("X"); auto *fetch_var = scope.FindVar(fetch_var_name); PADDLE_ENFORCE(fetch_var != nullptr, diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index d4bf6406e5716a6b65a234d1cd642b64dcc5726f..6dd58d28db23ff3de8a27e898a9b539787d08718 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -33,8 +33,10 @@ class FillConstantInferShape : public framework::InferShapeBase { class FillConstantOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto data_type = static_cast<framework::proto::DataType>(Attr<int>("dtype")); auto value = Attr<float>("value"); diff --git a/paddle/fluid/operators/fill_op.cc b/paddle/fluid/operators/fill_op.cc index 8e318f37cf0bc945597b5aa7b384e53038c97786..0b97c9c2827ac1be4e99c647dbedc2d9b8730e41 100644 --- a/paddle/fluid/operators/fill_op.cc +++ b/paddle/fluid/operators/fill_op.cc @@ -42,8 +42,10 @@ class FillOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto &out = detail::Ref(detail::Ref(scope.FindVar(Output("Out")), "Cannot find variable %s", Output("Out")) diff --git a/paddle/fluid/operators/get_places_op.cc b/paddle/fluid/operators/get_places_op.cc index ba908e472bbc165a244d8543713f1dbf293abb48..ef635048bd4faa2dc0067152f5f7472acbfe47af 100644 --- a/paddle/fluid/operators/get_places_op.cc +++ b/paddle/fluid/operators/get_places_op.cc @@ -37,8 +37,10 @@ class GetPlacesOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { bool is_gpu; if (Attr<std::string>("device_type") == "AUTO") { is_gpu = platform::is_gpu_place(place); diff --git a/paddle/fluid/operators/increment_op.cc b/paddle/fluid/operators/increment_op.cc index 3d488067b254c37515c6bdb9a4589aad311f344f..de4949584b7b20bec7b31f2ad1b69053ee9ffc0f 100644 --- a/paddle/fluid/operators/increment_op.cc +++ b/paddle/fluid/operators/increment_op.cc @@ -51,8 +51,9 @@ class IncrementOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>(); auto &out = *scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>(); diff --git a/paddle/fluid/operators/is_empty_op.cc b/paddle/fluid/operators/is_empty_op.cc index ea424018d66dac85d5a4ad75cbf5199064d52848..dac8505e3f2cb33b35b6184184e4762078a19c49 100644 --- a/paddle/fluid/operators/is_empty_op.cc +++ b/paddle/fluid/operators/is_empty_op.cc @@ -28,8 +28,9 @@ class IsEmptyOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { // get input auto *var = scope.FindVar(Input(kInput)); PADDLE_ENFORCE_NOT_NULL(var); diff --git a/paddle/fluid/operators/load_combine_op.cc b/paddle/fluid/operators/load_combine_op.cc index 1948063d886b79964b1a52d9d82a8e7d2fb0d493..d043702ebae627951927f2dbec893d40f77f0c73 100644 --- a/paddle/fluid/operators/load_combine_op.cc +++ b/paddle/fluid/operators/load_combine_op.cc @@ -26,8 +26,10 @@ class LoadCombineOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto filename = Attr<std::string>("file_path"); std::ifstream fin(filename); diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc index c9bf5d72b234f96d9eb5a4c275737ac8c18cd63d..9393cccfc66ec930db6ef68bd6f3c5065ceea80e 100644 --- a/paddle/fluid/operators/load_op.cc +++ b/paddle/fluid/operators/load_op.cc @@ -25,8 +25,10 @@ class LoadOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto filename = Attr<std::string>("file_path"); std::ifstream fin(filename); PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s for load op", diff --git a/paddle/fluid/operators/lod_array_length_op.cc b/paddle/fluid/operators/lod_array_length_op.cc index f11f5a89f5ad5b2f3deed905625aefa1e9d9935b..daa57c20450f1f92cb0bb500e37d0d8c49c05758 100644 --- a/paddle/fluid/operators/lod_array_length_op.cc +++ b/paddle/fluid/operators/lod_array_length_op.cc @@ -25,8 +25,10 @@ class LoDArrayLengthOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensorArray>(); auto &out = *scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>(); diff --git a/paddle/fluid/operators/lod_rank_table_op.cc b/paddle/fluid/operators/lod_rank_table_op.cc index 0b9426a9f8f0b0b3082667dc7a1414aceb824aca..3264766d6b693244f8dbfa6462b9c7aa13d5b5ec 100644 --- a/paddle/fluid/operators/lod_rank_table_op.cc +++ b/paddle/fluid/operators/lod_rank_table_op.cc @@ -23,8 +23,10 @@ class LoDRankTableOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>(); auto *out = scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>(); diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc index edc32bcec1441e50e24612789727db9a044cde54..d6e24dc976a1ebe2afa182618d09839b105381c1 100644 --- a/paddle/fluid/operators/lod_tensor_to_array_op.cc +++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc @@ -32,8 +32,10 @@ class LoDTensorToArrayOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto &x = detail::Ref(scope.FindVar(Input("X")), "Cannot find input %s", Input("X")) .Get<framework::LoDTensor>(); diff --git a/paddle/fluid/operators/max_sequence_len_op.cc b/paddle/fluid/operators/max_sequence_len_op.cc index eff8b927e52c94a4e19bb10c644cbaa34a7a0581..cef0dc307dbe97473e9041f51c25eca7cc9a0f1a 100644 --- a/paddle/fluid/operators/max_sequence_len_op.cc +++ b/paddle/fluid/operators/max_sequence_len_op.cc @@ -27,8 +27,9 @@ class MaxSeqenceLenOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto &rank_table = scope.FindVar(Input("RankTable"))->Get<framework::LoDRankTable>(); auto *out = diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index 255f55334093213df867852e4d222f0e227e8c5d..88e67b6b86a3731cc2caf5529aa4892c6d605a86 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -27,8 +27,10 @@ class MergeLoDTensorOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(dev_place); diff --git a/paddle/fluid/operators/mine_hard_examples_op.cc b/paddle/fluid/operators/mine_hard_examples_op.cc index 73a6c0b679310ac4108a915836b5ed497853b38b..540cf867418ec4378e5b97a343b9dcc85604f50c 100644 --- a/paddle/fluid/operators/mine_hard_examples_op.cc +++ b/paddle/fluid/operators/mine_hard_examples_op.cc @@ -237,6 +237,8 @@ class MineHardExamplesOp : public framework::OperatorWithKernel { } ctx->SetOutputDim("UpdatedMatchIndices", idx_dims); + // The first dimension of NegIndices will be set correcttly in Compute. + ctx->SetOutputDim("NegIndices", {-1, 1}); } protected: diff --git a/paddle/fluid/operators/nccl_op.cc b/paddle/fluid/operators/nccl_op.cc index 52420ceba0de0323dae000aa301ce7838b3311b6..703e8dd00fc8e613344db11065d6a45afa2a0cc8 100644 --- a/paddle/fluid/operators/nccl_op.cc +++ b/paddle/fluid/operators/nccl_op.cc @@ -26,8 +26,9 @@ class NCCLInitOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { const auto &name = Output("Communicator"); PADDLE_ENFORCE_NOT_NULL(scope.FindVar(name), "Can not find variable '%s' in the scope.", name); diff --git a/paddle/fluid/operators/net_op.h b/paddle/fluid/operators/net_op.h index 14e5909851c4ac08b5f59c5c193c801827b91234..479ba386a70adaff09ae31e24c449fc18a9853b1 100644 --- a/paddle/fluid/operators/net_op.h +++ b/paddle/fluid/operators/net_op.h @@ -57,20 +57,6 @@ class NetOp : public framework::OperatorBase { this->CompleteAddOp(); } - /** - * @brief Run the network. - * - * Run all the operators with the `scope`, if no scope is provided, default - * scope will be used instead. If no OpContext is provicded, default context - * will be used. - */ - void Run(const framework::Scope& scope, - const platform::Place& place) const override { - for (auto& op : ops_) { - op->Run(scope, place); - } - } - bool SupportGPU() const override { for (auto& op : ops_) { if (!op->SupportGPU()) { @@ -117,6 +103,20 @@ class NetOp : public framework::OperatorBase { std::vector<std::unique_ptr<framework::OperatorBase>> ops_; private: + /** + * @brief Run the network. + * + * Run all the operators with the `scope`, if no scope is provided, default + * scope will be used instead. If no OpContext is provicded, default context + * will be used. + */ + void RunImpl(const framework::Scope& scope, + const platform::Place& place) const override { + for (auto& op : ops_) { + op->Run(scope, place); + } + } + bool add_op_done_{false}; std::set<std::string> intermediate_outputs_; diff --git a/paddle/fluid/operators/net_op_test.cc b/paddle/fluid/operators/net_op_test.cc index cc20be0c81763abe2adcf09de858ce51e16d77a6..265f15e82ed29824ed65917dbe45e5edf9dc8993 100644 --- a/paddle/fluid/operators/net_op_test.cc +++ b/paddle/fluid/operators/net_op_test.cc @@ -26,7 +26,10 @@ class TestOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; DEFINE_OP_CLONE_METHOD(TestOp); - void Run(const Scope& scope, const platform::Place& place) const override { + + private: + void RunImpl(const Scope& scope, + const platform::Place& place) const override { ++run_cnt; } }; diff --git a/paddle/fluid/operators/parallel_do_op.cc b/paddle/fluid/operators/parallel_do_op.cc index e25df92479943d210d98f02374f377f778f43d2c..d791d11172869d42b08c059b900e729bcc9b5d96 100644 --- a/paddle/fluid/operators/parallel_do_op.cc +++ b/paddle/fluid/operators/parallel_do_op.cc @@ -118,8 +118,9 @@ class ParallelDoOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : framework::OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); @@ -207,8 +208,9 @@ class ParallelDoGradOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : framework::OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto *block = Attr<framework::BlockDesc *>(kParallelBlock); auto *program = block->Program(); diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index 3616545309e8c279f61a22e571a5e71335c47f93..4d12fdbb6b62d1d7095d10aa6f33d12598a8e99e 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -130,8 +130,9 @@ class TensorPrintOp : public framework::OperatorBase { PADDLE_THROW("Not implemented."); } - void Run(const framework::Scope& scope, - const platform::Place& place) const override { + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& place) const override { const framework::Variable* in_var_ptr = nullptr; std::string phase = kForward; std::string printed_var_name = ""; diff --git a/paddle/fluid/operators/read_op.cc b/paddle/fluid/operators/read_op.cc index 4d562c291911f54c9d1e8fed2e84035808bffbb7..127df82ff13b89de42e45113a21d6f5e7c2f20ed 100644 --- a/paddle/fluid/operators/read_op.cc +++ b/paddle/fluid/operators/read_op.cc @@ -54,8 +54,10 @@ class ReadInferVarType : public framework::VarTypeInference { class ReadOp : public framework::OperatorBase { public: using framework::OperatorBase::OperatorBase; - void Run(const framework::Scope& scope, - const platform::Place& dev_place) const override { + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { framework::ReaderHolder* reader = scope.FindVar(Input("Reader"))->GetMutable<framework::ReaderHolder>(); if (!reader->HasNext()) { diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index e4b9b8dab9b0394752d538aa5f59be3c06d0188f..33a744a5b7fef5802569a305d18746f04ed88136 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -226,8 +226,9 @@ class RecurrentOp : public RecurrentBase { const framework::AttributeMap &attrs) : RecurrentBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto seq_len = static_cast<size_t>(this->GetSequenceLength(scope)); VLOG(3) << "Static RNN input sequence length = " << seq_len; StepScopes scopes = CreateStepScopes(scope, seq_len); @@ -315,8 +316,9 @@ class RecurrentGradOp : public RecurrentBase { const framework::AttributeMap &attrs) : RecurrentBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto seq_len = static_cast<size_t>(GetSequenceLength(scope)); StepScopes scopes = CreateStepScopes(scope, seq_len); auto reverse = Attr<bool>(kReverse); diff --git a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc index 148a65bb4b7fe599f2fdb833c179665e58fe1c41..79ba9e543b892d051995d4bafb0ceaaf09838cd2 100644 --- a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc @@ -75,8 +75,10 @@ class ReorderLoDTensorByRankTableBase : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto &x = detail::Ref(scope.FindVar(Input("X")), "Cannot find input lod tensor variable %s", Input("X")) diff --git a/paddle/fluid/operators/rnn_memory_helper_op.cc b/paddle/fluid/operators/rnn_memory_helper_op.cc index 504456c4b069f81319893ae51f57503f5025761a..e9329a0e7e279e2bdd3c45986580c87aa5d0b1fe 100644 --- a/paddle/fluid/operators/rnn_memory_helper_op.cc +++ b/paddle/fluid/operators/rnn_memory_helper_op.cc @@ -24,8 +24,10 @@ class RNNMemoryHelperOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto mem_var_name = Input("X"); auto *mem_var = scope.FindVar(mem_var_name); PADDLE_ENFORCE(mem_var != nullptr, @@ -76,8 +78,10 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto out_grad_var_name = Input(framework::GradVarName("Out")); auto *out_grad_var = scope.FindVar(out_grad_var_name); diff --git a/paddle/fluid/operators/save_combine_op.cc b/paddle/fluid/operators/save_combine_op.cc index c23de9073ef965b989e98936b2dd07fc6bce7fdc..e3953e4b08082c08e1bbf77a834d4a895b327f83 100644 --- a/paddle/fluid/operators/save_combine_op.cc +++ b/paddle/fluid/operators/save_combine_op.cc @@ -63,8 +63,10 @@ class SaveCombineOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto filename = Attr<std::string>("file_path"); auto overwrite = Attr<bool>("overwrite"); diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc index 483cdfa4c3b9e3b9abd3f32bc5e6e5e0b493bd23..85ba8e01182c2cd01aa599ddbce68b6b2d9aa5f4 100644 --- a/paddle/fluid/operators/save_op.cc +++ b/paddle/fluid/operators/save_op.cc @@ -62,8 +62,10 @@ class SaveOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto filename = Attr<std::string>("file_path"); auto overwrite = Attr<bool>("overwrite"); diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc index df50a324fde1637f1f9f64a0b0d4eff8ba3f26d2..7fe0526381d1fc18ad0552c321875af42df0f6dc 100644 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc @@ -27,8 +27,9 @@ class ShrinkRNNMemoryOp : public ArrayOp { const framework::AttributeMap &attrs) : ArrayOp(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto *x_var = scope.FindVar(Input("X")); PADDLE_ENFORCE(x_var != nullptr, "Input X must be set"); auto &x_tensor = x_var->Get<framework::LoDTensor>(); @@ -108,8 +109,9 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { const framework::AttributeMap &attrs) : ArrayOp(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto *dout_var = scope.FindVar(Input(framework::GradVarName("Out"))); auto *dx_var = scope.FindVar(Output(framework::GradVarName("X"))); PADDLE_ENFORCE(dx_var != nullptr, "Input Gradient should not be nullptr"); diff --git a/paddle/fluid/operators/split_lod_tensor_op.cc b/paddle/fluid/operators/split_lod_tensor_op.cc index f821dc54d7bbe697d3642e64dc1628ec7d966592..f9600d99a36f59feddfbb5295b8b21ca6d5034cd 100644 --- a/paddle/fluid/operators/split_lod_tensor_op.cc +++ b/paddle/fluid/operators/split_lod_tensor_op.cc @@ -33,8 +33,10 @@ class SplitLoDTensorOp : public framework::OperatorBase { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>(); auto &mask = scope.FindVar(Input("Mask"))->Get<framework::LoDTensor>(); auto *out_true = diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index 06bcf82620bec57346c30b029d23ad8417252248..54420e1bf6ec982545715dc847b0b3e138cf2045 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -38,7 +38,7 @@ class SplitOpKernel : public framework::OpKernel<T> { auto out_stride = framework::stride_numel(out->dims()); StridedNumelCopyWithAxis<T>(ctx.device_context(), axis, out->data<T>(), out_stride, in->data<T>() + input_offset, - in_stride); + in_stride, out_stride[axis]); input_offset += out_stride[axis]; } } diff --git a/paddle/fluid/operators/strided_memcpy.h b/paddle/fluid/operators/strided_memcpy.h index 385124305e2d9afd62313ca46178b4916cd6405d..4c7b90693a2f9ba62d9c30bb601ea4aaebeaf4b5 100644 --- a/paddle/fluid/operators/strided_memcpy.h +++ b/paddle/fluid/operators/strided_memcpy.h @@ -54,7 +54,8 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, int64_t axis, T* dst, const framework::DDim& dst_stride_numel, const T* src, - const framework::DDim& src_stride_numel) { + const framework::DDim& src_stride_numel, + int64_t size) { int64_t before = dst_stride_numel[0] / dst_stride_numel[axis]; int64_t src_after = src_stride_numel[axis]; int64_t dst_after = dst_stride_numel[axis]; @@ -82,15 +83,14 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, if (platform::is_cpu_place(place)) { auto& cpu_place = boost::get<platform::CPUPlace>(place); memory::Copy(cpu_place, dst + i * dst_after, cpu_place, - src + i * src_after, sizeof(T) * src_after); + src + i * src_after, sizeof(T) * size); } else { #ifdef PADDLE_WITH_CUDA auto& gpu_place = boost::get<platform::CUDAPlace>(place); auto& cuda_ctx = reinterpret_cast<const platform::CUDADeviceContext&>(ctx); memory::Copy(gpu_place, dst + i * dst_after, gpu_place, - src + i * src_after, sizeof(T) * src_after, - cuda_ctx.stream()); + src + i * src_after, sizeof(T) * size, cuda_ctx.stream()); #else PADDLE_THROW("Paddle is not compiled with GPU"); #endif diff --git a/paddle/fluid/operators/tensor_array_read_write_op.cc b/paddle/fluid/operators/tensor_array_read_write_op.cc index 50811fb22491598849216f41a584ae0b68f8f306..704ee964c908c44d84316985429a6551b770e33f 100644 --- a/paddle/fluid/operators/tensor_array_read_write_op.cc +++ b/paddle/fluid/operators/tensor_array_read_write_op.cc @@ -24,8 +24,9 @@ class WriteToArrayOp : public ArrayOp { const framework::AttributeMap &attrs) : ArrayOp(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto *x = scope.FindVar(Input("X")); if (x == nullptr) return; auto &x_tensor = x->Get<framework::LoDTensor>(); @@ -122,8 +123,10 @@ class ReadFromArrayOp : public ArrayOp { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : ArrayOp(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &place) const override { + + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &place) const override { auto *x = scope.FindVar(Input("X")); PADDLE_ENFORCE(x != nullptr, "X must be set"); auto &x_array = x->Get<framework::LoDTensorArray>(); diff --git a/paddle/fluid/operators/while_op.cc b/paddle/fluid/operators/while_op.cc index d254c572acff52d967e551c377b3b32b05c92973..a7a05cc5f79da6c1e6945a83f997e54041d2045d 100644 --- a/paddle/fluid/operators/while_op.cc +++ b/paddle/fluid/operators/while_op.cc @@ -39,8 +39,9 @@ class WhileOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : framework::OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { PADDLE_ENFORCE_NOT_NULL(scope.FindVar(Input(kCondition))); auto &cond = scope.FindVar(Input(kCondition))->Get<LoDTensor>(); PADDLE_ENFORCE_EQ(cond.dims(), paddle::framework::make_ddim({1})); @@ -99,8 +100,9 @@ class WhileGradOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : framework::OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::Place &dev_place) const override { + private: + void RunImpl(const framework::Scope &scope, + const platform::Place &dev_place) const override { // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(dev_place); diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 1486d5ed2579a49a4722a8b0abdfdba6bf196615..442a7ea883052e73a5d50d5558f57732be93fb3a 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -204,6 +204,17 @@ function gen_capi_package() { fi } +function gen_fluid_inference_lib() { + if [ ${WITH_C_API:-OFF} == "OFF" ] ; then + cat <<EOF + ======================================== + Building fluid inference library ... + ======================================== +EOF + make inference_lib_dist + fi +} + set -xe cmake_gen ${PYTHON_ABI:-""} @@ -212,6 +223,7 @@ run_test gen_docs gen_dockerfile gen_capi_package +gen_fluid_inference_lib if [[ ${WITH_C_API:-OFF} == "ON" ]]; then printf "PaddlePaddle C-API libraries was generated on build/paddle.tgz\n" diff --git a/python/paddle/v2/fluid/distribute_transpiler.py b/python/paddle/v2/fluid/distribute_transpiler.py index e4675e24b178b2f1745c2b38270ac381ebfe6550..689920af0c4fb85d11c3492d83da2d22d9c4fa6e 100644 --- a/python/paddle/v2/fluid/distribute_transpiler.py +++ b/python/paddle/v2/fluid/distribute_transpiler.py @@ -121,6 +121,7 @@ def split_dense_variable(var_list, block_size += dim1 - remains # update split_count after aligning split_count = int(math.ceil(var_numel / float(block_size))) + print("###split var ", var.name, var.shape, block_size, split_count) for block_id in xrange(split_count): curr_block_size = min(block_size, var_numel - ( (block_id) * block_size)) @@ -191,7 +192,6 @@ class DistributeTranspiler: for b in param_blocks: varname, block_id, _ = b.split(":") send_outputs.append(param_var_mapping[varname][int(block_id)]) - # let send_op know which endpoint to send which var to, eplist has the same # order as send_inputs. eplist = split_method(send_inputs, pserver_endpoints) @@ -230,21 +230,6 @@ class DistributeTranspiler: outputs={"Out": [orig_param]}, attrs={"axis": 0}) - self.lr_param_mapping = self._create_lr_param_mapping() - - def _create_lr_param_mapping(self): - lr_mapping = dict() - for _, opt_op in enumerate(self.optimize_ops): - if not opt_op.inputs or not opt_op.inputs.has_key("LearningRate") \ - or not opt_op.inputs.has_key("Param"): - continue - lr = opt_op.inputs["LearningRate"].name - param = opt_op.inputs["Param"].name - if not lr_mapping.has_key(lr): - lr_mapping.update({lr: list()}) - lr_mapping[lr].append(param) - return lr_mapping - def _create_vars_from_blocklist(self, program, block_list): # Create respective variables using the block_list block_map = dict() @@ -271,6 +256,7 @@ class DistributeTranspiler: splited_shape = [rows] if len(orig_shape) >= 2: splited_shape.extend(orig_shape[1:]) + print("###splited: ", size, rows, splited_shape) var = program.global_block().create_var( name="%s.block%d" % (varname, i), psersistable=False, @@ -278,6 +264,7 @@ class DistributeTranspiler: type=orig_var.type, shape=splited_shape) # flattend splited var var_mapping[varname].append(var) + print("###created split var ", var) return var_mapping def _clone_var(self, block, var): @@ -369,18 +356,9 @@ class DistributeTranspiler: pass return orig_shape - def _fetch_var_names(self, param_dict): - res = [] - if not param_dict: - return res - for _, values in param_dict.iteritems(): - if not isinstance(values, list): - values = [values] - res += [v.name for v in values] - return res - def _append_pserver_ops(self, optimize_block, opt_op, endpoint): program = optimize_block.program + pserver_block = program.global_block() new_inputs = dict() # update param/grad shape first, then other inputs like # moment can use the updated shape @@ -395,11 +373,11 @@ class DistributeTranspiler: # do not append this op if current endpoint # is not dealing with this grad block return - merged_var = program.global_block().vars[grad_block.name] + merged_var = pserver_block.vars[grad_block.name] # append merging ops if trainers > 1 if self.trainers > 1: vars2merge = self._create_var_for_trainers( - program.global_block(), grad_block, self.trainers) + pserver_block, grad_block, self.trainers) optimize_block.append_op( type="sum", inputs={"X": vars2merge}, @@ -419,29 +397,27 @@ class DistributeTranspiler: break if not param_block: return - tmpvar = program.global_block().create_var( + tmpvar = pserver_block.create_var( name=param_block.name, persistable=True, dtype=param_block.dtype, shape=param_block.shape) - new_inputs[key] = tmpvar elif key == "LearningRate": # leraning rate variable has already be created by non-optimize op, # don't create it once again. - new_inputs[key] = program.global_block().vars[opt_op.input(key)[ - 0]] + new_inputs[key] = pserver_block.vars[opt_op.input(key)[0]] for key in opt_op.input_names: new_shape = None if key in ["Param", "Grad", "LearningRate"]: continue - var = program.global_block().vars[opt_op.input(key)[0]] + var = self.program.global_block().vars[opt_op.input(key)[0]] # update accumulator variable shape param_shape = new_inputs["Param"].shape new_shape = self._get_optimizer_input_shape(opt_op.type, key, var.shape, param_shape) - tmpvar = program.global_block().create_var( + tmpvar = pserver_block.create_var( name=var.name, persistable=var.persistable, dtype=var.dtype, @@ -449,11 +425,14 @@ class DistributeTranspiler: new_inputs[key] = tmpvar # change output's ParamOut variable - opt_op.outputs["ParamOut"] = new_inputs["Param"] + outputs = self._get_output_map_from_op(self.program.global_block().vars, + opt_op) + outputs["ParamOut"] = new_inputs["Param"] + optimize_block.append_op( type=opt_op.type, inputs=new_inputs, - outputs=opt_op.outputs, + outputs=outputs, attrs=opt_op.attrs) def _append_pserver_non_opt_ops(self, optimize_block, opt_op): @@ -497,11 +476,12 @@ class DistributeTranspiler: # If one op's input is another op's output or # one op's output is another op's input, we say # the two operator is connected. - op1_input_names = self._fetch_var_names(op1.inputs) - op1_output_names = self._fetch_var_names(op1.outputs) + op1_input_names = op1.desc.input_arg_names() + op1_output_names = op1.desc.output_arg_names() + + op2_input_names = op2.desc.input_arg_names() + op2_output_names = op2.desc.output_arg_names() - op2_input_names = self._fetch_var_names(op2.inputs) - op2_output_names = self._fetch_var_names(op2.outputs) if set(op1_output_names) & set(op2_input_names) or \ set(op1_input_names) & set(op2_output_names): return True @@ -521,8 +501,8 @@ class DistributeTranspiler: def _is_opt_op(self, op): # NOTE: It's a HACK implement. # optimize op: SGDOptimize, MomentumOptimizer, AdamOptimizer and etc... - if op.inputs and op.inputs.has_key("Param") \ - and op.inputs.has_key("LearningRate"): + if "Param" in op.input_names and \ + "LearningRate" in op.input_names: return True return False @@ -530,12 +510,12 @@ class DistributeTranspiler: param_names = [ p.name for p in self.param_grad_ep_mapping[endpoint]["params"] ] - if op.inputs["Param"].name in param_names: + if op.input("Param") in param_names: return True else: for n in param_names: - param = op.inputs["Param"].name - if same_or_split_var(n, param) and n != op.inputs["Param"].name: + param = op.input("Param")[0] + if same_or_split_var(n, param) and n != param: return True return False return False @@ -551,6 +531,8 @@ class DistributeTranspiler: """ # step5 pserver_program = Program() + print("param mapping on pserver: #### ", + self.param_grad_ep_mapping[endpoint]["params"]) for v in self.param_grad_ep_mapping[endpoint]["params"]: self._clone_var(pserver_program.global_block(), v) for v in self.param_grad_ep_mapping[endpoint]["grads"]: @@ -564,7 +546,6 @@ class DistributeTranspiler: persistable=True, dtype=v.dtype, shape=v.shape) - # step6 optimize_block = pserver_program.create_block(0) # step 6.1 diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index a517db68c5886fbcbe19e6981aee5bf3971352e4..35d3df785ba4f74ce1681e471e7a83dfdaf71987 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -400,9 +400,6 @@ class Operator(object): """ self.block = block self.desc = desc - # for clone a new operator - self.inputs = inputs - self.outputs = outputs self.attrs = attrs if len(self.desc.type()) != 0: return diff --git a/python/paddle/v2/fluid/layers/__init__.py b/python/paddle/v2/fluid/layers/__init__.py index cfbbf710b6ac63b9a0fe7d51b0d1940532e948fc..f4fb2ca2798ab8ea8c7c634194d2e0c1371a2b93 100644 --- a/python/paddle/v2/fluid/layers/__init__.py +++ b/python/paddle/v2/fluid/layers/__init__.py @@ -16,8 +16,6 @@ import ops from ops import * import nn from nn import * -import detection -from detection import * import io from io import * import tensor @@ -33,7 +31,6 @@ from detection import * __all__ = [] __all__ += math_op_patch.__all__ -__all__ += detection.__all__ __all__ += nn.__all__ __all__ += io.__all__ __all__ += tensor.__all__ diff --git a/python/paddle/v2/fluid/layers/detection.py b/python/paddle/v2/fluid/layers/detection.py index 6d0f12f47503c9dd4fed6e7eba5001555d3c84ce..6af5c8388b7f51563cf7208b89565c5aea2db71f 100644 --- a/python/paddle/v2/fluid/layers/detection.py +++ b/python/paddle/v2/fluid/layers/detection.py @@ -1,10 +1,10 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -15,20 +15,32 @@ All layers just related to the detection neural network. """ +from layer_function_generator import generate_layer_fn from ..layer_helper import LayerHelper -from ..param_attr import ParamAttr -from ..framework import Variable import tensor import ops import nn import math __all__ = [ - 'detection_output', 'prior_box', 'multi_box_head', + 'bipartite_match', + 'target_assign', + 'detection_output', + 'ssd_loss', +] + +__auto__ = [ + 'iou_similarity', + 'box_coder', ] +__all__ += __auto__ + +for _OP in set(__auto__): + globals()[_OP] = generate_layer_fn(_OP) + def detection_output(scores, loc, @@ -98,18 +110,13 @@ def detection_output(scores, """ helper = LayerHelper("detection_output", **locals()) - decoded_box = helper.create_tmp_variable(dtype=loc.dtype) - helper.append_op( - type="box_coder", - inputs={ - 'PriorBox': prior_box, - 'PriorBoxVar': prior_box_var, - 'TargetBox': loc - }, - outputs={'OutputBox': decoded_box}, - attrs={'code_type': 'decode_center_size'}) - nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) + decoded_box = box_coder( + prior_box=prior_box, + prior_box_var=prior_box_var, + target_box=loc, + code_type='decode_center_size') + nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) helper.append_op( type="multiclass_nms", inputs={'Scores': scores, @@ -280,22 +287,22 @@ def prior_box(inputs, if aspect_ratios: _is_list_or_tuple_and_equal( aspect_ratios, num_layer, - 'aspect_ratios should be list and the length of inputs ' + 'aspect_ratios should be list or tuple, and the length of inputs ' 'and aspect_ratios should be the same.') if step_h: _is_list_or_tuple_and_equal( step_h, num_layer, - 'step_h should be list and the length of inputs and ' + 'step_h should be list or tuple, and the length of inputs and ' 'step_h should be the same.') if step_w: _is_list_or_tuple_and_equal( step_w, num_layer, - 'step_w should be list and the length of inputs and ' + 'step_w should be list or tuple, and the length of inputs and ' 'step_w should be the same.') if steps: _is_list_or_tuple_and_equal( steps, num_layer, - 'steps should be list and the length of inputs and ' + 'steps should be list or tuple, and the length of inputs and ' 'step_w should be the same.') step_w = steps step_h = steps @@ -339,6 +346,331 @@ def prior_box(inputs, return box, var +def bipartite_match(dist_matrix, name=None): + """ + **Bipartite matchint operator** + + This operator is a greedy bipartite matching algorithm, which is used to + obtain the matching with the maximum distance based on the input + distance matrix. For input 2D matrix, the bipartite matching algorithm can + find the matched column for each row, also can find the matched row for + each column. And this operator only calculate matched indices from column + to row. For each instance, the number of matched indices is the number of + of columns of the input ditance matrix. + + There are two outputs to save matched indices and distance. + A simple description, this algothrim matched the best (maximum distance) + row entity to the column entity and the matched indices are not duplicated + in each row of ColToRowMatchIndices. If the column entity is not matched + any row entity, set -1 in ColToRowMatchIndices. + + Please note that the input DistMat can be LoDTensor (with LoD) or Tensor. + If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size. + If Tensor, the height of ColToRowMatchIndices is 1. + + Args: + dist_matrix(Variable): This input is a 2-D LoDTensor with shape + [K, M]. It is pair-wise distance matrix between the entities + represented by each row and each column. For example, assumed one + entity is A with shape [K], another entity is B with shape [M]. The + dist_matirx[i][j] is the distance between A[i] and B[j]. The bigger + the distance is, the better macthing the pairs are. Please note, + This tensor can contain LoD information to represent a batch of + inputs. One instance of this batch can contain different numbers of + entities. + Returns: + match_indices(Variable): A 2-D Tensor with shape [N, M] in int type. + N is the batch size. If match_indices[i][j] is -1, it + means B[j] does not match any entity in i-th instance. + Otherwise, it means B[j] is matched to row + match_indices[i][j] in i-th instance. The row number of + i-th instance is saved in match_indices[i][j]. + match_distance(Variable): A 2-D Tensor with shape [N, M] in float type. + N is batch size. If match_indices[i][j] is -1, + match_distance[i][j] is also -1.0. Otherwise, assumed + match_distance[i][j] = d, and the row offsets of each instance + are called LoD. Then match_distance[i][j] = dist_matrix[d+LoD[i]][j]. + """ + helper = LayerHelper('bipartite_match', **locals()) + match_indices = helper.create_tmp_variable(dtype='int32') + match_distance = helper.create_tmp_variable(dtype=dist_matrix.dtype) + helper.append_op( + type='bipartite_match', + inputs={'DistMat': dist_matrix}, + outputs={ + 'ColToRowMatchIndices': match_indices, + 'ColToRowMatchDist': match_distance + }) + return match_indices, match_distance + + +def target_assign(input, + matched_indices, + negative_indices=None, + mismatch_value=None, + name=None): + """ + **Target assigner operator** + + This operator can be, for given the target bounding boxes or labels, + to assign classification and regression targets to each prediction as well as + weights to prediction. The weights is used to specify which prediction would + not contribute to training loss. + + For each instance, the output `out` and`out_weight` are assigned based on + `match_indices` and `negative_indices`. + Assumed that the row offset for each instance in `input` is called lod, + this operator assigns classification/regression targets by performing the + following steps: + + 1. Assigning all outpts based on `match_indices`: + + If id = match_indices[i][j] > 0, + + out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K] + out_weight[i][j] = 1. + + Otherwise, + + out[j][j][0 : K] = {mismatch_value, mismatch_value, ...} + out_weight[i][j] = 0. + + 2. Assigning out_weight based on `neg_indices` if `neg_indices` is provided: + + Assumed that the row offset for each instance in `neg_indices` is called neg_lod, + for i-th instance and each `id` of neg_indices in this instance: + + out[i][id][0 : K] = {mismatch_value, mismatch_value, ...} + out_weight[i][id] = 1.0 + + Args: + inputs (Variable): This input is a 3D LoDTensor with shape [M, P, K]. + matched_indices (Variable): Tensor<int>), The input matched indices + is 2D Tenosr<int32> with shape [N, P], If MatchIndices[i][j] is -1, + the j-th entity of column is not matched to any entity of row in + i-th instance. + negative_indices (Variable): The input negative example indices are + an optional input with shape [Neg, 1] and int32 type, where Neg is + the total number of negative example indices. + mismatch_value (float32): Fill this value to the mismatched location. + + Returns: + out (Variable): The output is a 3D Tensor with shape [N, P, K], + N and P is the same as they are in `neg_indices`, K is the + same as it in input of X. If `match_indices[i][j]`. + out_weight (Variable): The weight for output with the shape of [N, P, 1]. + """ + helper = LayerHelper('target_assign', **locals()) + out = helper.create_tmp_variable(dtype=input.dtype) + out_weight = helper.create_tmp_variable(dtype='float32') + helper.append_op( + type='target_assign', + inputs={ + 'X': input, + 'MatchIndices': matched_indices, + 'NegIndices': negative_indices + }, + outputs={'Out': out, + 'OutWeight': out_weight}, + attrs={'mismatch_value': mismatch_value}) + return out, out_weight + + +def ssd_loss(location, + confidence, + gt_box, + gt_label, + prior_box, + prior_box_var=None, + background_label=0, + overlap_threshold=0.5, + neg_pos_ratio=3.0, + neg_overlap=0.5, + loc_loss_weight=1.0, + conf_loss_weight=1.0, + match_type='per_prediction', + mining_type='max_negative', + sample_size=None): + """ + **Multi-box loss layer for object dection algorithm of SSD** + + This layer is to compute dection loss for SSD given the location offset + predictions, confidence predictions, prior boxes and ground-truth boudding + boxes and labels, and the type of hard example mining. The returned loss + is a weighted sum of the localization loss (or regression loss) and + confidence loss (or classification loss) by performing the following steps: + + 1. Find matched boundding box by bipartite matching algorithm. + 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. + 1.2 Compute matched boundding box by bipartite matching algorithm. + 2. Compute confidence for mining hard examples + 2.1. Get the target label based on matched indices. + 2.2. Compute confidence loss. + 3. Apply hard example mining to get the negative example indices and update + the matched indices. + 4. Assign classification and regression targets + 4.1. Encoded bbox according to the prior boxes. + 4.2. Assign regression targets. + 4.3. Assign classification targets. + 5. Compute the overall objective loss. + 5.1 Compute confidence loss. + 5.1 Compute localization loss. + 5.3 Compute the overall weighted loss. + + Args: + location (Variable): The location predictions are a 3D Tensor with + shape [N, Np, 4], N is the batch size, Np is total number of + predictions for each instance. 4 is the number of coordinate values, + the layout is [xmin, ymin, xmax, ymax]. + confidence (Variable): The confidence predictions are a 3D Tensor + with shape [N, Np, C], N and Np are the same as they are in + `location`, C is the class number. + gt_box (Variable): The ground-truth boudding boxes (bboxes) are a 2D + LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth + bboxes of mini-batch input. + gt_label (Variable): The ground-truth labels are a 2D LoDTensor + with shape [Ng, 1]. + prior_box (Variable): The prior boxes are a 2D Tensor with shape [Np, 4]. + prior_box_var (Variable): The variance of prior boxes are a 2D Tensor + with shape [Np, 4]. + background_label (int): The index of background label, 0 by default. + overlap_threshold (float): If match_type is 'per_prediction', use + `overlap_threshold` to determine the extra matching bboxes when + finding matched boxes. 0.5 by default. + neg_pos_ratio (float): The ratio of the negative boxes to the positive + boxes, used only when mining_type is max_negative, 3.0 by defalut. + neg_overlap (float): The negative overlap upper bound for the unmatched + predictions. Use only when mining_type is max_negative, + 0.5 by default. + sample_size (int): The max sample size of negative box, used only when + mining_type is hard_example. + loc_loss_weight (float): Weight for localization loss, 1.0 by default. + conf_loss_weight (float): Weight for confidence loss, 1.0 by default. + match_type (str): The type of matching method during training, should + be 'bipartite' or 'per_prediction'. + mining_type (str): The hard example mining type, should be 'hard_example' + or 'max_negative', now only support `max_negative`. + + Returns: + Variable: The weighted sum of the localization loss and confidence loss, + with shape [N * Np, 1], N and Np are the same as they are + in `location`. + + Raises: + ValueError: If mining_type is 'hard_example', now only support + mining type of `max_negative`. + + Examples: + .. code-block:: python + + pb = layers.data( + name='prior_box', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + pbv = layers.data( + name='prior_box_var', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + loc = layers.data(name='target_box', shape=[10, 4], dtype='float32') + scores = layers.data(name='scores', shape=[10, 21], dtype='float32') + gt_box = layers.data( + name='gt_box', shape=[4], lod_level=1, dtype='float32') + gt_label = layers.data( + name='gt_label', shape=[1], lod_level=1, dtype='float32') + loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) + """ + + helper = LayerHelper('ssd_loss', **locals()) + if mining_type != 'max_negative': + raise ValueError("Only support mining_type == max_negative now.") + + num, num_prior, num_class = confidence.shape + + def __reshape_to_2d(var): + return ops.reshape(x=var, shape=[-1, var.shape[-1]]) + + # 1. Find matched boundding box by prior box. + # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. + iou = iou_similarity(x=gt_box, y=prior_box) + # 1.2 Compute matched boundding box by bipartite matching algorithm. + matched_indices, matched_dist = bipartite_match(iou) + + # 2. Compute confidence for mining hard examples + # 2.1. Get the target label based on matched indices + gt_label = ops.reshape(x=gt_label, shape=gt_label.shape + (1, )) + target_label, _ = target_assign( + gt_label, matched_indices, mismatch_value=background_label) + # 2.2. Compute confidence loss. + # Reshape confidence to 2D tensor. + confidence = __reshape_to_2d(confidence) + target_label = tensor.cast(x=target_label, dtype='int64') + target_label = __reshape_to_2d(target_label) + conf_loss = nn.softmax_with_cross_entropy(confidence, target_label) + + # 3. Mining hard examples + conf_loss = ops.reshape(x=conf_loss, shape=(num, num_prior)) + neg_indices = helper.create_tmp_variable(dtype='int32') + dtype = matched_indices.dtype + updated_matched_indices = helper.create_tmp_variable(dtype=dtype) + helper.append_op( + type='mine_hard_examples', + inputs={ + 'ClsLoss': conf_loss, + 'LocLoss': None, + 'MatchIndices': matched_indices, + 'MatchDist': matched_dist, + }, + outputs={ + 'NegIndices': neg_indices, + 'UpdatedMatchIndices': updated_matched_indices + }, + attrs={ + 'neg_pos_ratio': neg_pos_ratio, + 'neg_dist_threshold': neg_pos_ratio, + 'mining_type': mining_type, + 'sample_size': sample_size, + }) + + # 4. Assign classification and regression targets + # 4.1. Encoded bbox according to the prior boxes. + encoded_bbox = box_coder( + prior_box=prior_box, + prior_box_var=prior_box_var, + target_box=gt_box, + code_type='encode_center_size') + # 4.2. Assign regression targets + target_bbox, target_loc_weight = target_assign( + encoded_bbox, updated_matched_indices, mismatch_value=background_label) + # 4.3. Assign classification targets + target_label, target_conf_weight = target_assign( + gt_label, + updated_matched_indices, + negative_indices=neg_indices, + mismatch_value=background_label) + + # 5. Compute loss. + # 5.1 Compute confidence loss. + target_label = __reshape_to_2d(target_label) + target_label = tensor.cast(x=target_label, dtype='int64') + conf_loss = nn.softmax_with_cross_entropy(confidence, target_label) + target_conf_weight = __reshape_to_2d(target_conf_weight) + conf_loss = conf_loss * target_conf_weight + + # 5.2 Compute regression loss. + location = __reshape_to_2d(location) + target_bbox = __reshape_to_2d(target_bbox) + + loc_loss = nn.smooth_l1(location, target_bbox) + target_loc_weight = __reshape_to_2d(target_loc_weight) + loc_loss = loc_loss * target_loc_weight + + # 5.3 Compute overall weighted loss. + loss = conf_loss_weight * conf_loss + loc_loss_weight * loc_loss + return loss + + def multi_box_head(inputs, num_classes, min_sizes=None, diff --git a/python/paddle/v2/fluid/tests/test_detection.py b/python/paddle/v2/fluid/tests/test_detection.py index 2f1ecd66775cda506b880a85ebbe7c29e0c0857a..dd28a05313cd38665a6390552fc3247b84949f04 100644 --- a/python/paddle/v2/fluid/tests/test_detection.py +++ b/python/paddle/v2/fluid/tests/test_detection.py @@ -15,12 +15,11 @@ from __future__ import print_function import paddle.v2.fluid as fluid import paddle.v2.fluid.layers as layers -import paddle.v2.fluid.layers.detection as detection from paddle.v2.fluid.framework import Program, program_guard import unittest -class TestBook(unittest.TestCase): +class TestDetection(unittest.TestCase): def test_detection_output(self): program = Program() with program_guard(program): @@ -47,7 +46,67 @@ class TestBook(unittest.TestCase): out = layers.detection_output( scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv) self.assertIsNotNone(out) - # print(str(program)) + self.assertEqual(out.shape[-1], 6) + print(str(program)) + + def test_detection_api(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[4], dtype='float32') + y = layers.data(name='y', shape=[4], dtype='float32') + z = layers.data(name='z', shape=[4], dtype='float32', lod_level=1) + iou = layers.iou_similarity(x=x, y=y) + bcoder = layers.box_coder( + prior_box=x, + prior_box_var=y, + target_box=z, + code_type='encode_center_size') + self.assertIsNotNone(iou) + self.assertIsNotNone(bcoder) + + matched_indices, matched_dist = layers.bipartite_match(iou) + self.assertIsNotNone(matched_indices) + self.assertIsNotNone(matched_dist) + + gt = layers.data( + name='gt', shape=[1, 1], dtype='int32', lod_level=1) + trg, trg_weight = layers.target_assign( + gt, matched_indices, mismatch_value=0) + self.assertIsNotNone(trg) + self.assertIsNotNone(trg_weight) + + gt2 = layers.data( + name='gt2', shape=[10, 4], dtype='float32', lod_level=1) + trg, trg_weight = layers.target_assign( + gt2, matched_indices, mismatch_value=0) + self.assertIsNotNone(trg) + self.assertIsNotNone(trg_weight) + + print(str(program)) + + def test_ssd_loss(self): + program = Program() + with program_guard(program): + pb = layers.data( + name='prior_box', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + pbv = layers.data( + name='prior_box_var', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + loc = layers.data(name='target_box', shape=[10, 4], dtype='float32') + scores = layers.data(name='scores', shape=[10, 21], dtype='float32') + gt_box = layers.data( + name='gt_box', shape=[4], lod_level=1, dtype='float32') + gt_label = layers.data( + name='gt_label', shape=[1], lod_level=1, dtype='int32') + loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) + self.assertIsNotNone(loss) + self.assertEqual(loss.shape[-1], 1) + print(str(program)) class TestPriorBox(unittest.TestCase): @@ -68,7 +127,7 @@ class TestPriorBox(unittest.TestCase): conv4 = fluid.layers.conv2d(conv3, 3, 3, 2) conv5 = fluid.layers.conv2d(conv4, 3, 3, 2) - box, var = detection.prior_box( + box, var = layers.prior_box( inputs=[conv1, conv2, conv3, conv4, conv5, conv5], image=images, min_ratio=20,