diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto index 25f0ba418433571343c5b2bbfdbf9fb940eaec52..c99406799ba5f664c4b9f80e0567b293e4ffea51 100644 --- a/paddle/fluid/framework/framework.proto +++ b/paddle/fluid/framework/framework.proto @@ -80,7 +80,6 @@ message OpProto { optional bool duplicable = 3 [ default = false ]; optional bool intermediate = 4 [ default = false ]; optional bool dispensable = 5 [ default = false ]; - optional string reuse = 6; } // AttrProto describes the C++ type Attribute. diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index df2a7a27ca4a6011b214202ac9bf4f30dc482ece..152fc3361a733b906765f3206089c5252658c213 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -21,7 +21,6 @@ namespace framework { void OpProtoAndCheckerMaker::Validate() { validated_ = true; CheckNoDuplicatedInOutAttrs(); - CheckReuseVars(); } OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddInput( @@ -40,40 +39,6 @@ OpProtoAndCheckerMaker::VariableBuilder OpProtoAndCheckerMaker::AddOutput( return OpProtoAndCheckerMaker::VariableBuilder{output}; } -void OpProtoAndCheckerMaker::Reuse(const std::string& name, - const std::string& reused_name) { - bool found = false; - proto::OpProto::Var* var; - - for (auto& var : proto_->inputs()) { - if (var.name() == reused_name) { - found = true; - break; - } - } - PADDLE_ENFORCE(found == true, - "Input/Output name: %s reused_name: %s, one of them is not " - "exists or not matched.", - name, reused_name); - - found = false; - for (int i = 0; i < proto_->outputs().size(); ++i) { - var = proto_->mutable_outputs()->Mutable(i); - if (var->name() == name) { - PADDLE_ENFORCE(!var->has_reuse(), - "Output(%s) has been set reused var of %s", name, - var->reuse()); - found = true; - var->set_reuse(reused_name); - break; - } - } - PADDLE_ENFORCE(found == true, - "Input/Output name: %s reused_name: %s, one of them is not " - "exists or not matched.", - name, reused_name); -} - void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { std::unordered_set names; auto checker = [&](const std::string& name) { @@ -91,24 +56,6 @@ void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { } } -void OpProtoAndCheckerMaker::CheckReuseVars() { - std::unordered_set names; - for (auto& input : proto_->inputs()) { - names.insert(input.name()); - } - auto checker = [&](const std::string& name, const std::string& reused) { - PADDLE_ENFORCE( - names.count(reused), - "Output [%s] reuse Input [%s], but the input is not registered.", name, - reused); - }; - for (auto& output : proto_->outputs()) { - if (output.has_reuse()) { - checker(output.name(), output.reuse()); - } - } -} - void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, OpAttrChecker* attr_checker) { proto_ = proto; diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index 4ed3cc45d66849267ef4945a03da1db76b53e4ea..cd2471dc49503bb83245673ce543364f5f873995 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -14,8 +14,6 @@ limitations under the License. */ #pragma once #include -#include - #include "glog/logging.h" #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/framework.pb.h" @@ -73,11 +71,6 @@ class OpProtoAndCheckerMaker { var_->set_dispensable(true); return *this; } - - VariableBuilder &Reuse(const std::string &name) { - var_->set_reuse(name); - return *this; - } }; VariableBuilder AddInput(const std::string &name, const std::string &comment); @@ -85,8 +78,6 @@ class OpProtoAndCheckerMaker { VariableBuilder AddOutput(const std::string &name, const std::string &comment); - void Reuse(const std::string &name, const std::string &reused_name); - template TypedAttrChecker &AddAttr(const std::string &name, const std::string &comment, @@ -105,8 +96,6 @@ class OpProtoAndCheckerMaker { void CheckNoDuplicatedInOutAttrs(); void Validate(); - void CheckReuseVars(); - proto::OpProto *proto_; OpAttrChecker *op_checker_; bool validated_{false}; diff --git a/paddle/fluid/framework/op_proto_maker_test.cc b/paddle/fluid/framework/op_proto_maker_test.cc index b71c7b646857e11f291748c4c7c2af92b6d53231..a8030d377fdb4d4aef74b315e21792dad10fac96 100644 --- a/paddle/fluid/framework/op_proto_maker_test.cc +++ b/paddle/fluid/framework/op_proto_maker_test.cc @@ -47,120 +47,3 @@ TEST(ProtoMaker, DuplicatedInOut) { ASSERT_THROW(proto_maker(&op_proto, &op_checker), paddle::platform::EnforceNotMet); } - -class TestInplaceProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { - public: - void Make() { - AddInput("X", "input of test op"); - AddOutput("XOut", "output of test op").Reuse("X"); - } -}; - -class TestInplaceProtoMaker2 - : public paddle::framework::OpProtoAndCheckerMaker { - public: - void Make() { - AddInput("X", "input of test op"); - AddOutput("XOut", "output of test op").Reuse("X"); - AddOutput("NoOut", "output of test op").Reuse("NotExists"); - } -}; - -TEST(ProtoMaker, InplaceOutput) { - paddle::framework::proto::OpProto op_proto, op_proto2; - paddle::framework::OpAttrChecker op_checker; - TestInplaceProtoMaker proto_maker; - TestInplaceProtoMaker2 proto_maker2; - - proto_maker(&op_proto, &op_checker); - - ASSERT_THROW(proto_maker2(&op_proto2, &op_checker), - paddle::platform::EnforceNotMet); -} - -// normal reuse -class TestReuseProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { - public: - void Make() { - AddInput("X", "input of test op"); - AddInput("Y", "input of test op"); - AddOutput("Out", "output of test op"); - AddOutput("XOut", "output of test op"); - // avoid destructor exception. - // Validate(); - TestReuse(); - } - - virtual void TestReuse() {} -}; - -// test duplicate reuse error -class TestReuseProtoMaker2 : public TestReuseProtoMaker { - public: - void TestReuse() { - Reuse("Out", "X"); - Reuse("Out", "Y"); - } -}; - -// NotExists Input -class TestReuseProtoMaker3 : public TestReuseProtoMaker { - public: - void TestReuse() { - Reuse("Out", "NotExists"); - Reuse("XOut", "X"); - } -}; - -// NotExists Output -class TestReuseProtoMaker4 : public TestReuseProtoMaker { - public: - void TestReuse() { Reuse("NotExists", "X"); } -}; - -TEST(ProtoMaker, Reuse) { - paddle::framework::proto::OpProto op_proto; - paddle::framework::OpAttrChecker op_checker; - TestReuseProtoMaker proto_maker; - proto_maker(&op_proto, &op_checker); -} - -// NOTE(dzhwinter): -// There is a Fatal CHECK on base class destructor, which will call abort inside -// instead of -// throw an exception. If we throw an exception in Make(), we will trigger the -// CHECK and terminate the tests. -// -// I had tried to replace the default CHECK with a exception, however, it's -// still not supported by glog. -// the details: -// https://github.com/google/glog/issues/249 -// https://github.com/facebookresearch/TensorComprehensions/issues/351 -/* -TEST(ProtoMaker, ReuseWithException) { - paddle::framework::proto::OpProto op_proto2, op_proto3, op_proto4; - paddle::framework::OpAttrChecker op_checker; - TestReuseProtoMaker2 proto_maker2; - TestReuseProtoMaker3 proto_maker3; - TestReuseProtoMaker4 proto_maker4; - EXPECT_THROW(proto_maker2(&op_proto2, &op_checker), - paddle::platform::EnforceNotMet); - - EXPECT_THROW(proto_maker3(&op_proto3, &op_checker), - paddle::platform::EnforceNotMet); - - EXPECT_THROW(proto_maker4(&op_proto4, &op_checker), - paddle::platform::EnforceNotMet); -} - -void FailureFunction() { - throw std::runtime_error("Check failed in destructor."); - // return 0; -} - -int main(int argc, char** argv) { - testing::InitGoogleTest(&argc, argv); - google::InstallFailureFunction(&FailureFunction); - return RUN_ALL_TESTS(); -} -*/ diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 093108cb54779eb0cf35dd83e63eb0b1abb66dcd..3368ae2ee4cf65b85abf1fcd89dee14f43522e1f 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -156,12 +156,10 @@ ParallelExecutor::ParallelExecutor( params, member_->local_scopes_, member_->use_cuda_); #endif - if (VLOG_IS_ON(5)) { - // If the loss_var_name is given, the number of graph should be only one. - if (loss_var_name.size()) { - PADDLE_ENFORCE_EQ(ir::GraphNum(*graph), 1, - "The number of graph should be only one"); - } + // If the loss_var_name is given, the number of graph should be only one. + if (loss_var_name.size()) { + PADDLE_ENFORCE_EQ(ir::GraphNum(*graph), 1, + "The number of graph should be only one"); } if (exec_strategy.type_ == ExecutionStrategy::kDefault) { diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index 67994aad70a40c0e0c8a311914d4ea40b96eaf1e..6e682b69583e00ab1bbe1c0d22e21ae114a61a76 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -21,7 +21,7 @@ else fi USE_TENSORRT=OFF -if [ [-d"$TENSORRT_INCLUDE_DIR"] -a [-d"$TENSORRT_LIB_DIR"] ]; then +if [ -d "$TENSORRT_INCLUDE_DIR" -a -d "$TENSORRT_LIB_DIR" ]; then USE_TENSORRT=ON fi diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index bbf52bea1358c32596ab6f14eeaa419735d19fc6..9ddb3a5d29f973047507855b43b226913a3600b5 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -28,7 +28,7 @@ using paddle::framework::Tensor; public: \ void Make() override { \ AddInput("X", "Input of " #OP_NAME " operator"); \ - AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \ + AddOutput("Out", "Output of " #OP_NAME " operator"); \ AddAttr("use_mkldnn", \ "(bool, default false) Only used in mkldnn kernel") \ .SetDefault(false); \ diff --git a/paddle/fluid/operators/adam_op.cc b/paddle/fluid/operators/adam_op.cc index 5d670fe3b9d99a31a628ff707ff860564eca952e..f3717af630017eba18aa265f3dbb496e18280a57 100644 --- a/paddle/fluid/operators/adam_op.cc +++ b/paddle/fluid/operators/adam_op.cc @@ -92,9 +92,9 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator"); AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator"); - AddOutput("ParamOut", "(Tensor) Output parameter").Reuse("Param"); - AddOutput("Moment1Out", "(Tensor) Output first moment").Reuse("Moment1"); - AddOutput("Moment2Out", "(Tensor) Output second moment").Reuse("Moment2"); + AddOutput("ParamOut", "(Tensor) Output parameter"); + AddOutput("Moment1Out", "(Tensor) Output first moment"); + AddOutput("Moment2Out", "(Tensor) Output second moment"); AddAttr("beta1", "(float, default 0.9) " diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 5912a1a17cbd29c3ebd83f37133c044f0905c8bd..3eb473832577bd348b33ba9b0be9e597b78f26bc 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -135,15 +135,13 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Variance", "The global variance (for training) " "or estimated Variance (for testing)"); - AddOutput("Y", "result after normalization").Reuse("X"); + AddOutput("Y", "result after normalization"); AddOutput("MeanOut", "Share memory with Mean. " - "Store the global mean when training") - .Reuse("Mean"); + "Store the global mean when training"); AddOutput("VarianceOut", "Share memory with Variance. " - "Store the global Variance when training") - .Reuse("Variance"); + "Store the global Variance when training"); AddOutput("SavedMean", "Mean of the current mini batch, " "will apply to output when training") diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 8f2561fcc389922f05093055cba4b43dbd4e4536..2cd9979bd3426a15af34a49002d5db2fdd9aeec7 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -130,8 +130,7 @@ void Conv2DOpMaker::Make() { .AsDispensable(); AddOutput("Output", "(Tensor) The output tensor of convolution operator. " - "The format of output tensor is also NCHW.") - .Reuse("Input"); + "The format of output tensor is also NCHW."); AddInput("ResidualData", "(Tensor) Tensor with residual data " "to which convolution output will be added." @@ -238,8 +237,7 @@ void Conv3DOpMaker::Make() { "input image channels divided by the groups."); AddOutput("Output", "(Tensor) The output tensor of convolution operator." - "The format of output tensor is also NCDHW.") - .Reuse("Input"); + "The format of output tensor is also NCDHW."); AddAttr>("strides", "(vector, default:{1, 1, 1}), the " "strides(d_stride, h_stride, w_stride) of " diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index dda423efd35b96f5e1d7c55389818f46ef3d8694..46fff9d338b7759496faaf6dd9960d34887755ba 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -52,6 +52,9 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { PADDLE_ENFORCE( ctx->HasOutput("TargetBBox"), "Output(TargetBBox) of RpnTargetAssignOp should not be null"); + PADDLE_ENFORCE( + ctx->HasOutput("BBoxInsideWeight"), + "Output(BBoxInsideWeight) of RpnTargetAssignOp should not be null"); auto anchor_dims = ctx->GetInputDim("Anchor"); auto gt_boxes_dims = ctx->GetInputDim("GtBoxes"); @@ -68,6 +71,7 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel { ctx->SetOutputDim("ScoreIndex", {-1}); ctx->SetOutputDim("TargetLabel", {-1, 1}); ctx->SetOutputDim("TargetBBox", {-1, 4}); + ctx->SetOutputDim("BBoxInsideWeight", {-1, 4}); } protected: @@ -169,6 +173,7 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data, const float rpn_positive_overlap, const float rpn_negative_overlap, std::vector* fg_inds, std::vector* bg_inds, std::vector* tgt_lbl, + std::vector* fg_fake, std::vector* bbox_inside_weight, std::minstd_rand engine, bool use_random) { float epsilon = 0.00001; int anchor_num = anchor_to_gt_max.dims()[0]; @@ -201,12 +206,12 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data, // Reservoir Sampling int fg_num = static_cast(rpn_fg_fraction * rpn_batch_size_per_im); ReservoirSampling(fg_num, &fg_inds_fake, engine, use_random); - fg_num = static_cast(fg_inds_fake.size()); - for (int64_t i = 0; i < fg_num; ++i) { + int fg_fake_num = static_cast(fg_inds_fake.size()); + for (int64_t i = 0; i < fg_fake_num; ++i) { target_label[fg_inds_fake[i]] = 1; } - int bg_num = rpn_batch_size_per_im - fg_num; + int bg_num = rpn_batch_size_per_im - fg_fake_num; for (int64_t i = 0; i < anchor_num; ++i) { if (anchor_to_gt_max_data[i] < rpn_negative_overlap) { bg_inds_fake.push_back(i); @@ -214,12 +219,28 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data, } ReservoirSampling(bg_num, &bg_inds_fake, engine, use_random); bg_num = static_cast(bg_inds_fake.size()); + int fake_num = 0; for (int64_t i = 0; i < bg_num; ++i) { + // fg fake found + if (target_label[bg_inds_fake[i]] == 1) { + fake_num++; + fg_fake->emplace_back(fg_inds_fake[0]); + for (int j = 0; j < 4; ++j) { + bbox_inside_weight->emplace_back(T(0.)); + } + } target_label[bg_inds_fake[i]] = 0; } + for (int64_t i = 0; i < (fg_fake_num - fake_num) * 4; ++i) { + bbox_inside_weight->emplace_back(T(1.)); + } + for (int64_t i = 0; i < anchor_num; ++i) { - if (target_label[i] == 1) fg_inds->emplace_back(i); + if (target_label[i] == 1) { + fg_inds->emplace_back(i); + fg_fake->emplace_back(i); + } if (target_label[i] == 0) bg_inds->emplace_back(i); } fg_num = fg_inds->size(); @@ -248,7 +269,8 @@ std::vector SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx, std::vector bg_inds; std::vector gt_inds; std::vector tgt_lbl; - + std::vector fg_fake; + std::vector bbox_inside_weight; // Calculate the max IoU between anchors and gt boxes // Map from anchor to gt box that has highest overlap auto place = ctx.GetPlace(); @@ -275,32 +297,37 @@ std::vector SampleRpnFgBgGt(const platform::CPUDeviceContext& ctx, // Follow the Faster RCNN's implementation ScoreAssign(anchor_by_gt_overlap_data, anchor_to_gt_max, gt_to_anchor_max, rpn_batch_size_per_im, rpn_fg_fraction, rpn_positive_overlap, - rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, engine, - use_random); + rpn_negative_overlap, &fg_inds, &bg_inds, &tgt_lbl, &fg_fake, + &bbox_inside_weight, engine, use_random); int fg_num = fg_inds.size(); int bg_num = bg_inds.size(); - gt_inds.reserve(fg_num); - for (int i = 0; i < fg_num; ++i) { - gt_inds.emplace_back(argmax[fg_inds[i]]); + int fg_fake_num = fg_fake.size(); + gt_inds.reserve(fg_fake_num); + for (int i = 0; i < fg_fake_num; ++i) { + gt_inds.emplace_back(argmax[fg_fake[i]]); } - - Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t; - int* loc_index_data = loc_index_t.mutable_data({fg_num}, place); + Tensor loc_index_t, score_index_t, tgt_lbl_t, gt_inds_t, bbox_inside_weight_t; + int* loc_index_data = loc_index_t.mutable_data({fg_fake_num}, place); int* score_index_data = score_index_t.mutable_data({fg_num + bg_num}, place); int* tgt_lbl_data = tgt_lbl_t.mutable_data({fg_num + bg_num}, place); - int* gt_inds_data = gt_inds_t.mutable_data({fg_num}, place); - std::copy(fg_inds.begin(), fg_inds.end(), loc_index_data); + int* gt_inds_data = gt_inds_t.mutable_data({fg_fake_num}, place); + T* bbox_inside_weight_data = + bbox_inside_weight_t.mutable_data({fg_fake_num, 4}, place); + std::copy(fg_fake.begin(), fg_fake.end(), loc_index_data); std::copy(fg_inds.begin(), fg_inds.end(), score_index_data); std::copy(bg_inds.begin(), bg_inds.end(), score_index_data + fg_num); std::copy(tgt_lbl.begin(), tgt_lbl.end(), tgt_lbl_data); std::copy(gt_inds.begin(), gt_inds.end(), gt_inds_data); + std::copy(bbox_inside_weight.begin(), bbox_inside_weight.end(), + bbox_inside_weight_data); std::vector loc_score_tgtlbl_gt; loc_score_tgtlbl_gt.emplace_back(loc_index_t); loc_score_tgtlbl_gt.emplace_back(score_index_t); loc_score_tgtlbl_gt.emplace_back(tgt_lbl_t); loc_score_tgtlbl_gt.emplace_back(gt_inds_t); + loc_score_tgtlbl_gt.emplace_back(bbox_inside_weight_t); return loc_score_tgtlbl_gt; } @@ -318,6 +345,7 @@ class RpnTargetAssignKernel : public framework::OpKernel { auto* score_index = context.Output("ScoreIndex"); auto* tgt_bbox = context.Output("TargetBBox"); auto* tgt_lbl = context.Output("TargetLabel"); + auto* bbox_inside_weight = context.Output("BBoxInsideWeight"); PADDLE_ENFORCE_EQ(gt_boxes->lod().size(), 1UL, "RpnTargetAssignOp gt_boxes needs 1 level of LoD"); @@ -340,7 +368,7 @@ class RpnTargetAssignKernel : public framework::OpKernel { score_index->mutable_data({max_num}, place); tgt_bbox->mutable_data({max_num, 4}, place); tgt_lbl->mutable_data({max_num, 1}, place); - + bbox_inside_weight->mutable_data({max_num, 4}, place); auto& dev_ctx = context.device_context(); std::random_device rnd; @@ -394,6 +422,7 @@ class RpnTargetAssignKernel : public framework::OpKernel { Tensor sampled_score_index = loc_score_tgtlbl_gt[1]; Tensor sampled_tgtlbl = loc_score_tgtlbl_gt[2]; Tensor sampled_gt_index = loc_score_tgtlbl_gt[3]; + Tensor sampled_bbox_inside_weight = loc_score_tgtlbl_gt[4]; int loc_num = sampled_loc_index.dims()[0]; int score_num = sampled_score_index.dims()[0]; @@ -432,6 +461,8 @@ class RpnTargetAssignKernel : public framework::OpKernel { AppendRpns(score_index, total_score_num, &sampled_score_index_unmap); AppendRpns(tgt_bbox, total_loc_num * 4, &sampled_tgt_bbox); AppendRpns(tgt_lbl, total_score_num, &sampled_tgtlbl); + AppendRpns(bbox_inside_weight, total_loc_num * 4, + &sampled_bbox_inside_weight); total_loc_num += loc_num; total_score_num += score_num; @@ -448,10 +479,12 @@ class RpnTargetAssignKernel : public framework::OpKernel { score_index->set_lod(loc_score); tgt_bbox->set_lod(lod_loc); tgt_lbl->set_lod(loc_score); + bbox_inside_weight->set_lod(lod_loc); loc_index->Resize({total_loc_num}); score_index->Resize({total_score_num}); tgt_bbox->Resize({total_loc_num, 4}); tgt_lbl->Resize({total_score_num, 1}); + bbox_inside_weight->Resize({total_loc_num, 4}); } }; @@ -514,6 +547,9 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { "TargetLabel", "(Tensor), The target labels of each anchor with shape " "[F + B, 1], F and B are sampled foreground and backgroud number."); + AddOutput("BBoxInsideWeight", + "(Tensor), The bbox inside weight with shape " + "[F, 4], F is the sampled foreground number."); AddComment(R"DOC( This operator can be, for a given set of ground truth bboxes and the anchors, to assign classification and regression targets to each prediction. diff --git a/paddle/fluid/operators/elementwise_op.h b/paddle/fluid/operators/elementwise_op.h index 7e5975ead64ab39a9c618a33e300c4fce55a5b22..68c6e315cc3b5fa932f8946f6d4f838f4d3fc5a5 100644 --- a/paddle/fluid/operators/elementwise_op.h +++ b/paddle/fluid/operators/elementwise_op.h @@ -80,8 +80,6 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { void Make() final { AddInput("X", "(Tensor), The first input tensor of elementwise op."); AddInput("Y", "(Tensor), The second input tensor of elementwise op."); - // AddOutput("SavedShape", "(Tensor), save X, Y shape for grad to save - // memory.").AsIntermediate(); AddOutput("Out", "The output of elementwise op."); AddAttr("axis", "(int, default -1). The start dimension index " @@ -129,13 +127,11 @@ But the output only shares the LoD information with the input $X$. )DOC", GetName(), GetEquation())); - SetReuse(); } protected: virtual std::string GetName() const = 0; virtual std::string GetEquation() const = 0; - virtual void SetReuse() {} }; class ElementwiseOpGrad : public framework::OperatorWithKernel { @@ -269,7 +265,6 @@ class ElemwiseGradKernel : public framework::OpKernel { protected: \ virtual std::string GetName() const { return op_name; } \ virtual std::string GetEquation() const { return equation; } \ - virtual void SetReuse() { Reuse(__VA_ARGS__); } \ }; \ REGISTER_OPERATOR(op_type, ::paddle::operators::ElementwiseOp, \ __ElemwiseOp##op_type##Maker__, \ diff --git a/paddle/fluid/operators/mean_op.cc b/paddle/fluid/operators/mean_op.cc index 9e0bebd17c02a3ce010b77142757b8789cfbcdd9..19426b3c204095bd415cebcd87cff18468acd564 100644 --- a/paddle/fluid/operators/mean_op.cc +++ b/paddle/fluid/operators/mean_op.cc @@ -34,7 +34,7 @@ class MeanOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", "(Tensor) The input of mean op"); - AddOutput("Out", "(Tensor) The output of mean op").Reuse("X"); + AddOutput("Out", "(Tensor) The output of mean op"); AddComment(R"DOC( Mean Operator calculates the mean of all elements in X. diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index f8ad63690e84339da0390d4ddd2db45f25db385a..24a5346b031008531fcefff0e6f1c31da33d1c3b 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -151,8 +151,7 @@ void Pool2dOpMaker::Make() { "The format of output tensor is also NCHW, " "where N is batch size, C is the number of channels, " "H is the height of the feature, " - "and W is the width of the feature.") - .Reuse("X"); + "and W is the width of the feature."); AddAttr("pooling_type", "(string), pooling type, can be \"max\" for max-pooling " @@ -252,8 +251,7 @@ void Pool3dOpMaker::Make() { "The format of output tensor is also NCDHW, " "where N is batch size, C is " "the number of channels, and D, H and W is the depth, height and " - "width of the feature, respectively.") - .Reuse("X"); + "width of the feature, respectively."); AddAttr("pooling_type", "(string) Pooling type, can be \"max\" for max-pooling " diff --git a/paddle/fluid/operators/sgd_op.cc b/paddle/fluid/operators/sgd_op.cc index 411a126bc8e2b3a8d25f436489c13970568ccae4..ea62acd08c5009556abf05c91726111870d1a462 100644 --- a/paddle/fluid/operators/sgd_op.cc +++ b/paddle/fluid/operators/sgd_op.cc @@ -77,8 +77,7 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Grad", "(Tensor or SelectedRows) Input gradient"); AddOutput("ParamOut", "(Tensor or SelectedRows, same with Param) " - "Output parameter, should share the same memory with Param") - .Reuse("Param"); + "Output parameter, should share the same memory with Param"); AddComment(R"DOC( SGD operator diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index bb081238820b9ee3ae095442d21cfce11f7b41e5..a4bdbe6648afa7c91a056af4737bb5d826229022 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -80,8 +80,7 @@ class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "The input tensor of softmax, " "whose last dimension is the input_feature_dimensions."); - AddOutput("Out", "The normalized values with the same shape as X.") - .Reuse("X"); + AddOutput("Out", "The normalized values with the same shape as X."); AddAttr( "use_cudnn", "(bool, default false) Only used in cudnn kernel, need install cudnn") diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index fe7c7039c7dec714e265ede1b7167fd800ddc2f7..34dbac2ab8dcc9bd2b91e2daa2f42806057f5f56 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -132,7 +132,7 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "(vector) The input tensors of sum operator.") .AsDuplicable(); - AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X"); + AddOutput("Out", "(Tensor) The output tensor of sum operator."); AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index 4a8ac441cfaf642fde58ee30865a22e83c065498..c17d1afc309c65035063348d4934ea1783b018ed 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -50,7 +50,7 @@ class TopkOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", "(Tensor) The input of Topk op"); - AddOutput("Out", "(Tensor) The output tensor of Topk op").Reuse("X"); + AddOutput("Out", "(Tensor) The output tensor of Topk op"); AddOutput("Indices", "(Tensor) The indices of Topk elements of input"); AddComment(R"DOC( Top K operator diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu index 8e4a07556fb51dbb15ef948fcee120e2f68e089a..0cad224ca8860b0e4bc2e3f2bc1659235aadfe2d 100644 --- a/paddle/fluid/operators/top_k_op.cu +++ b/paddle/fluid/operators/top_k_op.cu @@ -262,31 +262,31 @@ __global__ void KeMatrixTopK(T* output, int output_stride, int64_t* indices, const T* src, int lds, int dim, int k, int grid_dim, int num) { __shared__ Pair sh_topk[BlockSize]; - __shared__ int maxid[BlockSize / 2]; const int tid = threadIdx.x; const int warp = threadIdx.x / 32; const int bid = blockIdx.x; for (int i = bid; i < num; i += grid_dim) { - output += i * output_stride; - indices += i * k; - + int top_num = k; + __shared__ int maxid[BlockSize / 2]; + T* out = output + i * output_stride; + int64_t* inds = indices + i * k; Pair topk[MaxLength]; int beam = MaxLength; Pair max; bool is_empty = false; bool firststep = true; - for (int k = 0; k < MaxLength; k++) { - topk[k].set(-INFINITY, -1); + for (int j = 0; j < MaxLength; j++) { + topk[j].set(-INFINITY, -1); } - while (k) { + while (top_num) { ThreadGetTopK( topk, &beam, k, src + i * lds, &firststep, &is_empty, &max, dim, tid); sh_topk[tid] = topk[0]; - BlockReduce(sh_topk, maxid, topk, &output, - &indices, &beam, &k, tid, warp); + BlockReduce(sh_topk, maxid, topk, &out, &inds, + &beam, &top_num, tid, warp); } } } @@ -327,13 +327,15 @@ class TopkOpCUDAKernel : public framework::OpKernel { size_t k = static_cast(ctx.Attr("k")); const T* input_data = input->data(); - T* output_data = output->mutable_data(ctx.GetPlace()); // FIXME(typhoonzero): data is always converted to type T? int64_t* indices_data = indices->mutable_data(ctx.GetPlace()); - size_t input_height = input->dims()[0]; - size_t input_width = input->dims()[1]; + framework::DDim inputdims = input->dims(); + const size_t input_height = framework::product( + framework::slice_ddim(inputdims, 0, inputdims.size() - 1)); + const size_t input_width = inputdims[inputdims.size() - 1]; + if (k > input_width) k = input_width; // NOTE: pass lds and dim same to input width. @@ -342,14 +344,12 @@ class TopkOpCUDAKernel : public framework::OpKernel { const int kMaxHeight = 2048; int gridx = input_height < kMaxHeight ? input_height : kMaxHeight; auto& dev_ctx = ctx.cuda_device_context(); - switch (GetDesiredBlockDim(input_width)) { FIXED_BLOCK_DIM( KeMatrixTopK<<>>( - output_data, output->dims()[1], indices_data, input_data, - input_width, input_width, static_cast(k), gridx, - input_height)); + output_data, k, indices_data, input_data, input_width, + input_width, static_cast(k), gridx, input_height)); default: PADDLE_THROW("Error"); } diff --git a/paddle/fluid/operators/top_k_op.h b/paddle/fluid/operators/top_k_op.h index 054dd481994d03f71b0ed5dc73e103085f6c91aa..76ece57b39919148da04caecaa43ea9d2b9d95df 100644 --- a/paddle/fluid/operators/top_k_op.h +++ b/paddle/fluid/operators/top_k_op.h @@ -34,7 +34,6 @@ class TopkKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // Get the top k elements of each row of input tensor - // FIXME: only deal with matrix(2d tensor). auto* input = ctx.Input("X"); auto* output = ctx.Output("Out"); auto* indices = ctx.Output("Indices"); @@ -44,8 +43,6 @@ class TopkKernel : public framework::OpKernel { T* output_data = output->mutable_data(ctx.GetPlace()); int64_t* indices_data = indices->mutable_data(ctx.GetPlace()); - auto eg_input = EigenMatrix::From(*input); - // reshape input to a flattern matrix(like flat_inner_dims) framework::DDim inputdims = input->dims(); const size_t row = framework::product( @@ -53,7 +50,7 @@ class TopkKernel : public framework::OpKernel { const size_t col = inputdims[inputdims.size() - 1]; Eigen::DSizes flat2dims(row, col); // NOTE: eigen shape doesn't affect paddle tensor. - eg_input.reshape(flat2dims); + auto eg_input = EigenMatrix::Reshape(*input, inputdims.size() - 1); #ifdef PADDLE_WITH_MKLML #pragma omp parallel for diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index b94b59631a3d8999f569acf1027c71d3019f5c56..ece22d0b7ed4cac6618c7be14939c770bcf1176d 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -116,8 +116,8 @@ def rpn_target_assign(bbox_pred, Returns: tuple: A tuple(predicted_scores, predicted_location, target_label, - target_bbox) is returned. The predicted_scores and - predicted_location is the predicted result of the RPN. + target_bbox, bbox_inside_weight) is returned. The predicted_scores + and predicted_location is the predicted result of the RPN. The target_label and target_bbox is the ground truth, respectively. The predicted_location is a 2D Tensor with shape [F, 4], and the shape of target_bbox is same as the shape of @@ -126,6 +126,8 @@ def rpn_target_assign(bbox_pred, [F + B, 1], and the shape of target_label is same as the shape of the predicted_scores, B is the number of the background anchors, the F and B is depends on the input of this operator. + Bbox_inside_weight represents whether the predicted loc is fake_fg + or not and the shape is [F, 4]. Examples: .. code-block:: python @@ -138,7 +140,7 @@ def rpn_target_assign(bbox_pred, append_batch_size=False, dtype='float32') gt_boxes = layers.data(name='gt_boxes', shape=[10, 4], append_batch_size=False, dtype='float32') - loc_pred, score_pred, loc_target, score_target = + loc_pred, score_pred, loc_target, score_target, bbox_inside_weight = fluid.layers.rpn_target_assign(bbox_pred=bbox_pred, cls_logits=cls_logits, anchor_box=anchor_box, @@ -152,6 +154,8 @@ def rpn_target_assign(bbox_pred, target_label = helper.create_variable_for_type_inference(dtype='int32') target_bbox = helper.create_variable_for_type_inference( dtype=anchor_box.dtype) + bbox_inside_weight = helper.create_variable_for_type_inference( + dtype=anchor_box.dtype) helper.append_op( type="rpn_target_assign", inputs={ @@ -164,7 +168,8 @@ def rpn_target_assign(bbox_pred, 'LocationIndex': loc_index, 'ScoreIndex': score_index, 'TargetLabel': target_label, - 'TargetBBox': target_bbox + 'TargetBBox': target_bbox, + 'BBoxInsideWeight': bbox_inside_weight }, attrs={ 'rpn_batch_size_per_im': rpn_batch_size_per_im, @@ -179,13 +184,14 @@ def rpn_target_assign(bbox_pred, score_index.stop_gradient = True target_label.stop_gradient = True target_bbox.stop_gradient = True + bbox_inside_weight.stop_gradient = True cls_logits = nn.reshape(x=cls_logits, shape=(-1, 1)) bbox_pred = nn.reshape(x=bbox_pred, shape=(-1, 4)) predicted_cls_logits = nn.gather(cls_logits, score_index) predicted_bbox_pred = nn.gather(bbox_pred, loc_index) - return predicted_cls_logits, predicted_bbox_pred, target_label, target_bbox + return predicted_cls_logits, predicted_bbox_pred, target_label, target_bbox, bbox_inside_weight def detection_output(loc, diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 56129641ce5900d82aedf243d2fa1eadfd6b8d86..28dc7519571d8b5464e92fddf634ba46691ceaa9 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -301,7 +301,7 @@ class TestRpnTargetAssign(unittest.TestCase): dtype='float32', lod_level=1, append_batch_size=False) - pred_scores, pred_loc, tgt_lbl, tgt_bbox = layers.rpn_target_assign( + pred_scores, pred_loc, tgt_lbl, tgt_bbox, bbox_inside_weight = layers.rpn_target_assign( bbox_pred=bbox_pred, cls_logits=cls_logits, anchor_box=anchor_box, @@ -313,15 +313,18 @@ class TestRpnTargetAssign(unittest.TestCase): rpn_straddle_thresh=0.0, rpn_fg_fraction=0.5, rpn_positive_overlap=0.7, - rpn_negative_overlap=0.3) + rpn_negative_overlap=0.3, + use_random=False) self.assertIsNotNone(pred_scores) self.assertIsNotNone(pred_loc) self.assertIsNotNone(tgt_lbl) self.assertIsNotNone(tgt_bbox) + self.assertIsNotNone(bbox_inside_weight) assert pred_scores.shape[1] == 1 assert pred_loc.shape[1] == 4 assert pred_loc.shape[1] == tgt_bbox.shape[1] + print(str(program)) class TestGenerateProposals(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py index f63dbcd3d7f6bfce3ccc1c42ae41afe42bfad003..1a2c9bb5f43d55d8e6183de0d55bfcc2b9ac3f08 100644 --- a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py @@ -50,8 +50,10 @@ def rpn_target_assign(anchor_by_gt_overlap, fg_inds, size=(len(fg_inds) - num_fg), replace=False) else: disable_inds = fg_inds[num_fg:] + labels[disable_inds] = -1 fg_inds = np.where(labels == 1)[0] + bbox_inside_weight = np.zeros((len(fg_inds), 4), dtype=np.float32) num_bg = rpn_batch_size_per_im - np.sum(labels == 1) bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0] @@ -59,18 +61,27 @@ def rpn_target_assign(anchor_by_gt_overlap, enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)] else: enable_inds = bg_inds[:num_bg] + + fg_fake_inds = np.array([], np.int32) + fg_value = np.array([fg_inds[0]], np.int32) + fake_num = 0 + for bg_id in enable_inds: + if bg_id in fg_inds: + fake_num += 1 + fg_fake_inds = np.hstack([fg_fake_inds, fg_value]) labels[enable_inds] = 0 + + bbox_inside_weight[fake_num:, :] = 1 fg_inds = np.where(labels == 1)[0] bg_inds = np.where(labels == 0)[0] - - loc_index = fg_inds - score_index = np.hstack((fg_inds, bg_inds)) + loc_index = np.hstack([fg_fake_inds, fg_inds]) + score_index = np.hstack([fg_inds, bg_inds]) labels = labels[score_index] assert not np.any(labels == -1), "Wrong labels with -1" - gt_inds = anchor_to_gt_argmax[fg_inds] + gt_inds = anchor_to_gt_argmax[loc_index] - return loc_index, score_index, labels, gt_inds + return loc_index, score_index, labels, gt_inds, bbox_inside_weight def get_anchor(n, c, h, w): @@ -123,9 +134,12 @@ def rpn_target_assign_in_python(all_anchors, gt_boxes_slice = gt_boxes_slice[not_crowd_inds] iou = _bbox_overlaps(inside_anchors, gt_boxes_slice) - loc_inds, score_inds, labels, gt_inds = rpn_target_assign( - iou, rpn_batch_size_per_im, rpn_positive_overlap, - rpn_negative_overlap, rpn_fg_fraction, use_random) + loc_inds, score_inds, labels, gt_inds, bbox_inside_weight = \ + rpn_target_assign(iou, rpn_batch_size_per_im, + rpn_positive_overlap, + rpn_negative_overlap, + rpn_fg_fraction, + use_random) # unmap to all anchor loc_inds = inds_inside[loc_inds] score_inds = inds_inside[score_inds] @@ -139,6 +153,7 @@ def rpn_target_assign_in_python(all_anchors, score_indexes = score_inds tgt_labels = labels tgt_bboxes = box_deltas + bbox_inside_weights = bbox_inside_weight else: loc_indexes = np.concatenate( [loc_indexes, loc_inds + i * anchor_num]) @@ -146,8 +161,10 @@ def rpn_target_assign_in_python(all_anchors, [score_indexes, score_inds + i * anchor_num]) tgt_labels = np.concatenate([tgt_labels, labels]) tgt_bboxes = np.vstack([tgt_bboxes, box_deltas]) + bbox_inside_weights = np.vstack([bbox_inside_weights, \ + bbox_inside_weight]) - return loc_indexes, score_indexes, tgt_bboxes, tgt_labels + return loc_indexes, score_indexes, tgt_bboxes, tgt_labels, bbox_inside_weights class TestRpnTargetAssignOp(OpTest): @@ -182,10 +199,12 @@ class TestRpnTargetAssignOp(OpTest): rpn_fg_fraction = 0.5 use_random = False - loc_index, score_index, tgt_bbox, labels = rpn_target_assign_in_python( - all_anchors, gt_boxes, is_crowd, im_info, lod, rpn_straddle_thresh, - rpn_batch_size_per_im, rpn_positive_overlap, rpn_negative_overlap, - rpn_fg_fraction, use_random) + loc_index, score_index, tgt_bbox, labels, bbox_inside_weights = \ + rpn_target_assign_in_python(all_anchors, gt_boxes, is_crowd, + im_info, lod, rpn_straddle_thresh, + rpn_batch_size_per_im, rpn_positive_overlap, + rpn_negative_overlap, + rpn_fg_fraction, use_random) labels = labels[:, np.newaxis] self.op_type = "rpn_target_assign" @@ -207,7 +226,8 @@ class TestRpnTargetAssignOp(OpTest): 'LocationIndex': loc_index.astype('int32'), 'ScoreIndex': score_index.astype('int32'), 'TargetBBox': tgt_bbox.astype('float32'), - 'TargetLabel': labels.astype('int32') + 'TargetLabel': labels.astype('int32'), + 'BBoxInsideWeight': bbox_inside_weights.astype('float32') } def test_check_output(self): diff --git a/python/paddle/fluid/tests/unittests/test_top_k_op.py b/python/paddle/fluid/tests/unittests/test_top_k_op.py index e54e170f7f1e03db4b63db72edb7395d18130f68..69b29db83a43d18c0825b610642009a0377b9901 100644 --- a/python/paddle/fluid/tests/unittests/test_top_k_op.py +++ b/python/paddle/fluid/tests/unittests/test_top_k_op.py @@ -21,22 +21,27 @@ from op_test import OpTest class TestTopkOp(OpTest): def setUp(self): + self.set_args() self.op_type = "top_k" - k = 1 - input = np.random.random((32, 84)).astype("float32") - output = np.ndarray((32, k)) - indices = np.ndarray((32, k)).astype("int64") + k = self.top_k + input = np.random.random((self.row, k)).astype("float32") + output = np.ndarray((self.row, k)) + indices = np.ndarray((self.row, k)).astype("int64") self.inputs = {'X': input} self.attrs = {'k': k} - for rowid in range(32): + for rowid in range(self.row): row = input[rowid] - output[rowid] = np.sort(row)[-k:] - indices[rowid] = row.argsort()[-k:] + output[rowid] = np.sort(row)[::-1][:k] + indices[rowid] = row.argsort()[::-1][:k] self.outputs = {'Out': output, 'Indices': indices} + def set_args(self): + self.row = 32 + self.top_k = 1 + def test_check_output(self): self.check_output() @@ -50,14 +55,39 @@ class TestTopkOp3d(OpTest): output = np.ndarray((64, k)) indices = np.ndarray((64, k)).astype("int64") - # FIXME: should use 'X': input for a 3d input - self.inputs = {'X': input_flat_2d} + self.inputs = {'X': input} self.attrs = {'k': k} for rowid in range(64): row = input_flat_2d[rowid] - output[rowid] = np.sort(row)[-k:] - indices[rowid] = row.argsort()[-k:] + output[rowid] = np.sort(row)[::-1][:k] + indices[rowid] = row.argsort()[::-1][:k] + + self.outputs = { + 'Out': output.reshape((32, 2, k)), + 'Indices': indices.reshape((32, 2, k)) + } + + def test_check_output(self): + self.check_output() + + +class TestTopkOp2(OpTest): + def setUp(self): + self.op_type = "top_k" + k = 1 + m = 2056 + input = np.random.random((m, 84)).astype("float32") + output = np.ndarray((m, k)) + indices = np.ndarray((m, k)).astype("int64") + + self.inputs = {'X': input} + self.attrs = {'k': k} + + for rowid in range(m): + row = input[rowid] + output[rowid] = -np.sort(-row)[:k] + indices[rowid] = (-row).argsort()[:k] self.outputs = {'Out': output, 'Indices': indices} @@ -65,5 +95,17 @@ class TestTopkOp3d(OpTest): self.check_output() +class TestTopkOp3(TestTopkOp): + def set_args(self): + self.row = 2056 + self.top_k = 3 + + +class TestTopkOp4(TestTopkOp): + def set_args(self): + self.row = 40000 + self.top_k = 1 + + if __name__ == "__main__": unittest.main()