diff --git a/AUTHORS.md b/AUTHORS.md index 4db4a4a8e7441b07ce2db4adff043bb99a09014b..389a7ad8bcfbb1d294bdd1f16905a7a0d2fbb932 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -2,7 +2,7 @@ |---|---| | backyes | Yan-Fei Wang | | beckett1124 | Bin Qi | -| Canpio | Jia-Yi Feng | +| JiayiFeng | Jia-Yi Feng | | chengxiaohua1105 | Xiao-Hua Cheng | | cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang | | cxysteven | Xing-Yi Cheng | diff --git a/doc/design/switch_kernel.md b/doc/design/kernel_selection.md similarity index 100% rename from doc/design/switch_kernel.md rename to doc/design/kernel_selection.md diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 41b35b5b233abd737db07aaeb6c6dd4bf6d42b08..d134aad794b0a630dfc8395c484d111bc5462d9b 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -82,7 +82,7 @@ language = 'zh_CN' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build', '**/*_en*', '*_en*'] +exclude_patterns = ['_build', '**/*_en*', '*_en*', 'api/*'] # The reST default role (used for this markup: `text`) to use for all # documents. diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index 5822c2481dd61da2084b0de76f6f65aa4e32e033..1f057d2e839896722fedba6634607bcdf2fd893a 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -82,7 +82,7 @@ language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build', '**/*_cn*', '*_cn*'] +exclude_patterns = ['_build', '**/*_cn*', '*_cn*', 'api/*'] # The reST default role (used for this markup: `text`) to use for all # documents. diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index c7deba2ab475d3c4f2c95327af77af7031b591fd..a7b249d43bf3ad9924749d5e66618750f19d8bf7 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -11,7 +11,6 @@ if(MOBILE_INFERENCE) else() add_subdirectory(pserver) add_subdirectory(trainer) - add_subdirectory(string) add_subdirectory(scripts) if(WITH_C_API) diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index a6b4191518c45d0579f800ecb901dcd9667e17d5..7405ef17d3e01e4837412c96385e13a9e605a75b 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory(framework) add_subdirectory(operators) add_subdirectory(pybind) add_subdirectory(inference) +add_subdirectory(string) diff --git a/paddle/fluid/framework/ddim.cc b/paddle/fluid/framework/ddim.cc index f063ee2e6dd81e66b7b74aa23e9967d865c4d297..98f1fa9d209eb23627949a4ccb235566652076a5 100644 --- a/paddle/fluid/framework/ddim.cc +++ b/paddle/fluid/framework/ddim.cc @@ -314,5 +314,15 @@ DDim stride(const DDim& ddim) { } return framework::make_ddim(strides); } + +DDim stride_numel(const framework::DDim& ddim) { + std::vector strides(ddim.size()); + strides[ddim.size() - 1] = ddim[ddim.size() - 1]; + for (int i = ddim.size() - 2; i >= 0; --i) { + strides[i] = strides[i + 1] * ddim[i]; + } + return framework::make_ddim(strides); +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ddim.h b/paddle/fluid/framework/ddim.h index 750ab787abb72fa3f2984caf58354e327750aa3d..405d9af7028d74e52ed015a269770c9b4ae23a5f 100644 --- a/paddle/fluid/framework/ddim.h +++ b/paddle/fluid/framework/ddim.h @@ -125,6 +125,8 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims); DDim flatten_to_1d(const DDim& src); DDim stride(const DDim& ddim); + +DDim stride_numel(const DDim& ddim); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/init.cc b/paddle/fluid/framework/init.cc index cb2d740d8609210064a06cccc5d45c84275e9709..ad806a8cd791bc164d974d21aec92beff1f8106f 100644 --- a/paddle/fluid/framework/init.cc +++ b/paddle/fluid/framework/init.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" -#include "paddle/string/piece.h" +#include "paddle/fluid/string/piece.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/fluid/framework/mixed_vector.h index 9756754260d46519d181f95e000f39ba92d22ef0..4dc3de54deef71e42d73e0943021691c5e39f7c7 100644 --- a/paddle/fluid/framework/mixed_vector.h +++ b/paddle/fluid/framework/mixed_vector.h @@ -37,9 +37,8 @@ class Vector { // Fill vector with value. The vector size is `count`. explicit Vector(size_t count, const T& value = T()) { - if (count == 0) { - InitEmpty(); - } else { + InitEmpty(); + if (count != 0) { resize(count); T* ptr = begin(); for (size_t i = 0; i < count; ++i) { @@ -122,6 +121,10 @@ class Vector { const T* begin() const { return &this->operator[](0); } const T* end() const { return &this->operator[](size()); } + const T* cbegin() const { return begin(); } + + const T* cend() const { return end(); } + const T& back() const { auto it = end(); --it; @@ -244,7 +247,9 @@ class Vector { bool operator==(const Vector& other) const { if (size() != other.size()) return false; - for (auto it1 = begin(), it2 = other.begin(); it1 < end(); ++it1, ++it2) { + auto it1 = cbegin(); + auto it2 = other.cbegin(); + for (; it1 < cend(); ++it1, ++it2) { if (*it1 != *it2) { return false; } diff --git a/paddle/fluid/framework/mixed_vector_test.cu b/paddle/fluid/framework/mixed_vector_test.cu index a89064525661af71b22f18f835fd7b111956847b..0d5a914eac78096bd41814080bf4b2105d25e187 100644 --- a/paddle/fluid/framework/mixed_vector_test.cu +++ b/paddle/fluid/framework/mixed_vector_test.cu @@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) { for (int i = 0; i < 10; ++i) { tmp.push_back(i); } - ASSERT_EQ(tmp.size(), 10); + ASSERT_EQ(tmp.size(), 10UL); vec tmp2; tmp2 = tmp; - ASSERT_EQ(tmp2.size(), 10); + ASSERT_EQ(tmp2.size(), 10UL); for (int i = 0; i < 10; ++i) { ASSERT_EQ(tmp2[i], i); ASSERT_EQ(tmp2[i], tmp[i]); @@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) { for (int i = 0; i < 10; ++i) { tmp.push_back(i); } - ASSERT_EQ(tmp.size(), 10); + ASSERT_EQ(tmp.size(), 10UL); paddle::platform::CUDAPlace gpu(0); multiply_10<<<1, 1, 0, GetCUDAStream(gpu)>>>(tmp.MutableData(gpu)); @@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) { for (int i = 0; i < 10; ++i) { tmp.push_back(i); } - ASSERT_EQ(tmp.size(), 10); + ASSERT_EQ(tmp.size(), 10UL); paddle::platform::CUDAPlace gpu0(0); paddle::platform::SetDeviceId(0); multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0)); @@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) { ASSERT_EQ(tmp[i], i * 100); } } + +TEST(mixed_vector, InitWithCount) { + paddle::framework::Vector vec(10, 10); + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(vec[i], 10); + } +} diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 6006ed16bd4a9aece5772bad58dc75c8b0847206..91a8617d6615d31e1c7cfe1bc5457dd69dbf31f7 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -18,7 +18,7 @@ limitations under the License. */ #include // for call_once #include "glog/logging.h" #include "paddle/fluid/framework/threadpool.h" -#include "paddle/string/printf.h" +#include "paddle/fluid/string/printf.h" DEFINE_bool(benchmark, false, "Doing memory benchmark. It will make deleting scope synchronized, " diff --git a/paddle/fluid/operators/concat_op.h b/paddle/fluid/operators/concat_op.h index 72b3e225bf64f889804eb5e4fab9df4653f5452b..878e53058567500aeb9fe854a1a65ed5380572a8 100644 --- a/paddle/fluid/operators/concat_op.h +++ b/paddle/fluid/operators/concat_op.h @@ -28,17 +28,18 @@ class ConcatKernel : public framework::OpKernel { auto ins = ctx.MultiInput("X"); auto* out = ctx.Output("Out"); int64_t axis = static_cast(ctx.Attr("axis")); - const size_t n = ins.size(); + auto place = ctx.GetPlace(); + out->mutable_data(place); + + auto out_stride = framework::stride_numel(out->dims()); + size_t output_offset = 0; - out->mutable_data(ctx.GetPlace()); - auto out_stride = framework::stride(out->dims()); - for (size_t i = 0; i < n; i++) { - auto& in = ins[i]; - auto axis_dim = in->dims()[axis]; - auto in_stride = framework::stride(in->dims()); - StridedMemcpy(ctx.device_context(), in->data(), in_stride, - in->dims(), out_stride, out->data() + output_offset); - output_offset += axis_dim * in_stride[axis]; + for (auto* in : ins) { + auto in_stride = framework::stride_numel(in->dims()); + StridedNumelCopyWithAxis(ctx.device_context(), axis, + out->data() + output_offset, out_stride, + in->data(), in_stride); + output_offset += in_stride[axis]; } } }; @@ -50,17 +51,16 @@ class ConcatGradKernel : public framework::OpKernel { auto* in = ctx.Input(framework::GradVarName("Out")); auto outs = ctx.MultiOutput(framework::GradVarName("X")); int64_t axis = static_cast(ctx.Attr("axis")); - const size_t n = outs.size(); size_t input_offset = 0; - auto in_stride = framework::stride(in->dims()); - for (size_t i = 0; i < n; i++) { - auto& out = outs[i]; + auto in_stride = framework::stride_numel(in->dims()); + + for (auto& out : outs) { out->mutable_data(ctx.GetPlace()); - size_t axis_dim = out->dims()[axis]; - auto out_stride = framework::stride(out->dims()); - StridedMemcpy(ctx.device_context(), in->data() + input_offset, - in_stride, out->dims(), out_stride, out->data()); - input_offset += axis_dim * in_stride[axis]; + auto out_stride = framework::stride_numel(out->dims()); + StridedNumelCopyWithAxis(ctx.device_context(), axis, out->data(), + out_stride, in->data() + input_offset, + in_stride); + input_offset += out_stride[axis]; } } }; diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index a72708d9baad98b670b46da66f56bf79bad951be..426dd0dc0e95b7952de1b603a943fc725889685e 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -27,7 +27,7 @@ limitations under the License. */ #include "paddle/fluid/operators/detail/grpc_server.h" #include "paddle/fluid/operators/detail/sendrecvop_utils.h" #include "paddle/fluid/operators/detail/simple_block_queue.h" -#include "paddle/string/printf.h" +#include "paddle/fluid/string/printf.h" namespace paddle { namespace operators { @@ -101,11 +101,15 @@ class ListenAndServOp : public framework::OperatorBase { // TODO(typhoonzero): change this to a while_op for every cluster-batch. bool exit_flag = false; + // Record received sparse variables, so that + // we could reset those after execute optimize program + std::vector sparse_vars; while (!exit_flag) { // Get from multiple trainers, we don't care about the order in which // the gradients arrives, just add suffix 0~n and merge the gradient. rpc_service_->SetCond(0); size_t recv_var_cnt = 0; + size_t update_param_cnt = 0; int batch_barrier = 0; while (batch_barrier != fan_in) { const detail::MessageWithName &v = rpc_service_->Get(); @@ -126,13 +130,14 @@ class ListenAndServOp : public framework::OperatorBase { std::string param_var_name; if (it != grad_list.end()) { param_var_name = param_list[it - grad_list.begin()]; + update_param_cnt++; + VLOG(3) << "received grad: " << grad_var_name + << " updating param: " << param_var_name; } else { - LOG(ERROR) << "grad has no paired param:" << grad_var_name; + VLOG(3) << "received variable: " << grad_var_name + << " no need to update param"; } - VLOG(3) << "received grad: " << grad_var_name - << " updating param: " << param_var_name; - - if (fan_in > 1) { + if (fan_in > 1 && !param_var_name.empty()) { grad_var_name = this->GetGradVarNameForTrainer(grad_var_name); } auto *var = recv_scope.FindVar(grad_var_name); @@ -141,23 +146,35 @@ class ListenAndServOp : public framework::OperatorBase { PADDLE_THROW("Can not find server side var"); } detail::DeserializeFromMessage(v.second, dev_ctx, var); + if (var->IsType()) { + sparse_vars.push_back(var); + } } } VLOG(3) << "recv " << recv_var_cnt << " parmeters for one barrier."; - // TODO(Yancey1989): merge SelectedRows variables here if (exit_flag) { rpc_service_->ShutDown(); } - + VLOG(3) << "run optimize graph..."; try { executor.Run(*program, &recv_scope, block->ID(), /*global_block*/ false /*create_local_scope*/, false /*create_vars*/); } catch (std::exception &e) { LOG(ERROR) << "run sub program error " << e.what(); } + + // Reset the received sparse variables, the sum operator would not + // sum the input sparse variables which rows is empty at the next + // mini-batch. + // TOOD(Yancey1989): move the reset action into an operator, we couldn't + // have any hide logic in the operator. + for (auto &var : sparse_vars) { + var->GetMutable()->mutable_rows()->clear(); + } rpc_service_->SetCond(1); - rpc_service_->WaitClientGet(recv_var_cnt); + rpc_service_->WaitClientGet(update_param_cnt); grads_counter_.clear(); + sparse_vars.clear(); } // while(true) } diff --git a/paddle/fluid/operators/multiclass_nms_op.cc b/paddle/fluid/operators/multiclass_nms_op.cc index b2934f69cc9b2e50bdd5cbdf04deeaf5ca120e2c..168e6f85d6ac9f8d6522afe871d82e708da63227 100644 --- a/paddle/fluid/operators/multiclass_nms_op.cc +++ b/paddle/fluid/operators/multiclass_nms_op.cc @@ -38,22 +38,22 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { auto box_dims = ctx->GetInputDim("BBoxes"); auto score_dims = ctx->GetInputDim("Scores"); - PADDLE_ENFORCE_EQ(box_dims.size(), 2, - "The rank of Input(BBoxes) must be 2."); + PADDLE_ENFORCE_EQ(box_dims.size(), 3, + "The rank of Input(BBoxes) must be 3."); PADDLE_ENFORCE_EQ(score_dims.size(), 3, "The rank of Input(Scores) must be 3."); - PADDLE_ENFORCE_EQ(box_dims[1], 4, + PADDLE_ENFORCE_EQ(box_dims[2], 4, "The 2nd dimension of Input(BBoxes) must be 4, " "represents the layout of coordinate " "[xmin, ymin, xmax, ymax]"); - PADDLE_ENFORCE_EQ(box_dims[0], score_dims[2], + PADDLE_ENFORCE_EQ(box_dims[1], score_dims[2], "The 1st dimensiong of Input(BBoxes) must be equal to " "3rd dimension of Input(Scores), which represents the " "predicted bboxes."); // Here the box_dims[0] is not the real dimension of output. // It will be rewritten in the computing kernel. - ctx->SetOutputDim("Out", {box_dims[0], 6}); + ctx->SetOutputDim("Out", {box_dims[1], 6}); } protected: @@ -260,15 +260,20 @@ class MultiClassNMSKernel : public framework::OpKernel { int64_t batch_size = score_dims[0]; int64_t class_num = score_dims[1]; int64_t predict_dim = score_dims[2]; + int64_t box_dim = boxes->dims()[2]; std::vector>> all_indices; std::vector batch_starts = {0}; for (int64_t i = 0; i < batch_size; ++i) { Tensor ins_score = scores->Slice(i, i + 1); ins_score.Resize({class_num, predict_dim}); + + Tensor ins_boxes = boxes->Slice(i, i + 1); + ins_boxes.Resize({predict_dim, box_dim}); + std::map> indices; int num_nmsed_out = 0; - MultiClassNMS(ctx, ins_score, *boxes, indices, num_nmsed_out); + MultiClassNMS(ctx, ins_score, ins_boxes, indices, num_nmsed_out); all_indices.push_back(indices); batch_starts.push_back(batch_starts.back() + num_nmsed_out); } @@ -282,11 +287,15 @@ class MultiClassNMSKernel : public framework::OpKernel { for (int64_t i = 0; i < batch_size; ++i) { Tensor ins_score = scores->Slice(i, i + 1); ins_score.Resize({class_num, predict_dim}); + + Tensor ins_boxes = boxes->Slice(i, i + 1); + ins_boxes.Resize({predict_dim, box_dim}); + int64_t s = batch_starts[i]; int64_t e = batch_starts[i + 1]; if (e > s) { Tensor out = outs->Slice(s, e); - MultiClassOutput(ins_score, *boxes, all_indices[i], &out); + MultiClassOutput(ins_score, ins_boxes, all_indices[i], &out); } } } @@ -303,9 +312,9 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker { MultiClassNMSOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("BBoxes", - "(Tensor) A 2-D Tensor with shape [M, 4] represents the " - "predicted locations of M bounding bboxes. Each bounding box " - "has four coordinate values and the layout is " + "(Tensor) A 3-D Tensor with shape [N, M, 4] represents the " + "predicted locations of M bounding bboxes, N is the batch size. " + "Each bounding box has four coordinate values and the layout is " "[xmin, ymin, xmax, ymax]."); AddInput("Scores", "(Tensor) A 3-D Tensor with shape [N, C, M] represents the " diff --git a/paddle/fluid/operators/send_op.cc b/paddle/fluid/operators/send_op.cc index a8390aa6596c69f85e3ef736dda9dd99c3fd6dba..b241f738cbf60c0698b869da771807ec7e2c33af 100644 --- a/paddle/fluid/operators/send_op.cc +++ b/paddle/fluid/operators/send_op.cc @@ -24,6 +24,22 @@ limitations under the License. */ namespace paddle { namespace operators { +static bool IsVariableInitialized(const framework::Scope& scope, + const std::string& varname) { + auto* var = scope.FindVar(varname); + PADDLE_ENFORCE_NOT_NULL(var, "Can not find variable '%s' in the send side.", + varname); + if (var->IsType()) { + return var->Get().IsInitialized(); + } else if (var->IsType()) { + return var->Get().value().IsInitialized(); + } else { + PADDLE_THROW( + "Variable type in send side should be in " + "[LodTensor, SelectedRows]"); + } + return false; +} class SendOp : public framework::OperatorBase { public: @@ -51,8 +67,12 @@ class SendOp : public framework::OperatorBase { detail::RPCClient* rpc_client = client_var->GetMutable(); for (size_t i = 0; i < ins.size(); i++) { - VLOG(3) << "sending " << ins[i] << " to " << epmap[i]; - rpc_client->AsyncSendVariable(epmap[i], ctx, scope, ins[i]); + if (IsVariableInitialized(scope, ins[i])) { + VLOG(3) << "sending " << ins[i] << " to " << epmap[i]; + rpc_client->AsyncSendVariable(epmap[i], ctx, scope, ins[i]); + } else { + VLOG(3) << "don't send no-initialied variable: " << ins[i]; + } } PADDLE_ENFORCE(rpc_client->Wait()); diff --git a/paddle/fluid/operators/send_recv_op_test.cc b/paddle/fluid/operators/send_recv_op_test.cc index 716f687044a85d46676141ee125baf398e9e695d..37a3d246d713adb68ffa20e8b2faad32843b1eaa 100644 --- a/paddle/fluid/operators/send_recv_op_test.cc +++ b/paddle/fluid/operators/send_recv_op_test.cc @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" -#include "paddle/string/printf.h" +#include "paddle/fluid/string/printf.h" USE_NO_KERNEL_OP(send); USE_NO_KERNEL_OP(listen_and_serv); diff --git a/paddle/fluid/operators/sequence_expand_op.cc b/paddle/fluid/operators/sequence_expand_op.cc index 4ebce641d2876a4f2329b6d7d7263a6b2a31fcf6..28645e01b84df3c51016a3feb237c416a0d78b49 100644 --- a/paddle/fluid/operators/sequence_expand_op.cc +++ b/paddle/fluid/operators/sequence_expand_op.cc @@ -29,7 +29,9 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasOutput("Out")); PADDLE_ENFORCE(ctx->HasInput("Y")); framework::DDim out_dim; - out_dim = ctx->GetInputDim("Y"); + auto y_dim = ctx->GetInputDim("Y"); + out_dim = ctx->GetInputDim("X"); + out_dim[0] = y_dim[0]; ctx->ShareLoD("Y", "Out"); ctx->SetOutputDim("Out", out_dim); } diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index e78218f2fb108dac5bce717e03ce0aba1ed88195..06bcf82620bec57346c30b029d23ad8417252248 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/strided_memcpy.h" @@ -27,18 +28,18 @@ class SplitOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); auto outs = ctx.MultiOutput("Out"); - auto in_stride = framework::stride(in->dims()); + auto in_stride = framework::stride_numel(in->dims()); int64_t axis = static_cast(ctx.Attr("axis")); - const size_t n = outs.size(); + auto place = ctx.GetPlace(); + size_t input_offset = 0; - for (size_t i = 0; i < n; i++) { - auto& out = outs[i]; + for (auto& out : outs) { out->mutable_data(ctx.GetPlace()); - size_t axis_dim = out->dims()[axis]; - auto out_stride = framework::stride(out->dims()); - StridedMemcpy(ctx.device_context(), in->data() + input_offset, - in_stride, out->dims(), out_stride, out->data()); - input_offset += axis_dim * in_stride[axis]; + auto out_stride = framework::stride_numel(out->dims()); + StridedNumelCopyWithAxis(ctx.device_context(), axis, out->data(), + out_stride, in->data() + input_offset, + in_stride); + input_offset += out_stride[axis]; } } }; diff --git a/paddle/fluid/operators/split_selected_rows_op.cc b/paddle/fluid/operators/split_selected_rows_op.cc index 113ce2ce109778a355130aaf686261c1f71c0980..c30280f6545ee30afb46395ac0eb5efbba462c56 100644 --- a/paddle/fluid/operators/split_selected_rows_op.cc +++ b/paddle/fluid/operators/split_selected_rows_op.cc @@ -22,7 +22,7 @@ class SplitSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker { SplitSelectedRowsOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input SelectedRows."); - AddOutput("Out", "The outputs of input SelectedRows.").AsDuplicable(); + AddOutput("Out", "The outputs of the input SelectedRows.").AsDuplicable(); AddAttr>("height_sections", "Height for each output SelectedRows.") .SetDefault(std::vector({})); @@ -56,27 +56,6 @@ class SplitSelectedRowsOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasInput("X"), "SplitSelectedRowsOp must has input X."); PADDLE_ENFORCE(ctx->HasOutputs("Out"), "SplitSelectedRowsOp must has output Out."); - - std::vector height_sections = - ctx->Attrs().Get>("height_sections"); - int64_t n = ctx->Outputs("Out").size(); - - std::vector outs_dims; - outs_dims.reserve(n); - - // make output dims - for (int64_t i = 0; i < n; ++i) { - auto dims = ctx->GetInputDim("X"); - if (height_sections.size()) { - PADDLE_ENFORCE_EQ( - height_sections.size(), static_cast(n), - "The size of height section should be the same with height" - " section size."); - dims[0] = height_sections[i]; - } - outs_dims.push_back(dims); - } - ctx->SetOutputsDim("Out", outs_dims); } }; diff --git a/paddle/fluid/operators/split_selected_rows_op.h b/paddle/fluid/operators/split_selected_rows_op.h index 527264bd675520a98b442380e2d1ec259964e92e..af44b09b70095cb891bd6cabceb59daf118ecd22 100644 --- a/paddle/fluid/operators/split_selected_rows_op.h +++ b/paddle/fluid/operators/split_selected_rows_op.h @@ -55,6 +55,7 @@ class SplitSelectedRowsOpKernel : public framework::OpKernel { for (size_t i = 0; i < outs_rows_idx.size(); ++i) { auto rows_idx = outs_rows_idx[i]; + outs[i]->set_height(height_sections[i]); if (rows_idx.size() > 0) { auto dims = x->GetCompleteDims(); dims[0] = rows_idx.size(); diff --git a/paddle/fluid/operators/strided_memcpy.h b/paddle/fluid/operators/strided_memcpy.h index 8a99b405e266da48427fa23e9a3e67f2bc54c5a0..385124305e2d9afd62313ca46178b4916cd6405d 100644 --- a/paddle/fluid/operators/strided_memcpy.h +++ b/paddle/fluid/operators/strided_memcpy.h @@ -41,5 +41,62 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src, StridedCopyDimVisitor func(dev_ctx, src, src_stride, dst_stride, dst); boost::apply_visitor(func, dst_dim); } + +// Strided numel memory copy from src to dst by the specified axis +// +// For example, for a tensor dims [4, 20, 100], the strieded numel is +// [8000, 2000, 100] +// +// NOTE: The src and dst tensor should have the same elements +// except the specified axis. +template +inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, + int64_t axis, T* dst, + const framework::DDim& dst_stride_numel, + const T* src, + const framework::DDim& src_stride_numel) { + int64_t before = dst_stride_numel[0] / dst_stride_numel[axis]; + int64_t src_after = src_stride_numel[axis]; + int64_t dst_after = dst_stride_numel[axis]; + auto place = ctx.GetPlace(); + + PADDLE_ENFORCE_EQ(src_stride_numel.size(), dst_stride_numel.size(), + "src and dst tensor should have the same dims size."); + + for (int64_t i = 0; i < axis; ++i) { + if (i < axis) { + PADDLE_ENFORCE_EQ(src_stride_numel[i] / src_stride_numel[axis], + dst_stride_numel[i] / dst_stride_numel[axis], + "src and dst should have the same elements " + "except the specified axis."); + } else if (i == axis) { + continue; + } else { + PADDLE_ENFORCE_EQ(src_stride_numel[i], dst_stride_numel[i], + "src and dst should have the same elements " + "except the specified axis."); + } + } + + for (int64_t i = 0; i < before; ++i) { + if (platform::is_cpu_place(place)) { + auto& cpu_place = boost::get(place); + memory::Copy(cpu_place, dst + i * dst_after, cpu_place, + src + i * src_after, sizeof(T) * src_after); + } else { +#ifdef PADDLE_WITH_CUDA + auto& gpu_place = boost::get(place); + auto& cuda_ctx = + reinterpret_cast(ctx); + memory::Copy(gpu_place, dst + i * dst_after, gpu_place, + src + i * src_after, sizeof(T) * src_after, + cuda_ctx.stream()); +#else + PADDLE_THROW("Paddle is not compiled with GPU"); +#endif + } + } +} + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 5e1222c6ef723a6321392a5af7fdb558c24df32b..08218b6836e2f10ba220523d6241fb0ee4c0b3f2 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -116,7 +116,9 @@ class SumKernel : public framework::OpKernel { int64_t offset = 0; for (int i = 0; i < N; i++) { auto &sel_row = get_selected_row(i); - + if (!sel_row.value().IsInitialized() || sel_row.rows().size() == 0) { + continue; + } PADDLE_ENFORCE_EQ(out->height(), sel_row.height()); functor(context.template device_context(), sel_row, offset, out); diff --git a/paddle/fluid/operators/target_assign_op.cc b/paddle/fluid/operators/target_assign_op.cc index 24f1b7252318f8321ef394d321aedb658398e16d..bafb830df930bc8d910898f7f2a33c61c6ff824b 100644 --- a/paddle/fluid/operators/target_assign_op.cc +++ b/paddle/fluid/operators/target_assign_op.cc @@ -22,69 +22,43 @@ class TargetAssignOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - // checkout inputs - PADDLE_ENFORCE(ctx->HasInput("EncodedGTBBox"), - "Input(EncodedGTBBox) of TargetAssignOp should not be null"); - PADDLE_ENFORCE(ctx->HasInput("GTScoreLabel"), - "Input(GTScoreLabel) of TargetAssignOp should not be null"); + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of TargetAssignOp should not be null"); PADDLE_ENFORCE(ctx->HasInput("MatchIndices"), "Input(MatchIndices) of TargetAssignOp should not be null"); - PADDLE_ENFORCE(ctx->HasInput("NegIndices"), - "Input(NegIndices) of TargetAssignOp should not be null"); - - // checkout outputs - PADDLE_ENFORCE( - ctx->HasOutput("PredBBoxLabel"), - "Output(PredBBoxLabel) of TargetAssignOp should not be null."); - PADDLE_ENFORCE( - ctx->HasOutput("PredBBoxWeight"), - "Output(PredBBoxWeight) of TargetAssignOp should not be null."); - PADDLE_ENFORCE( - ctx->HasOutput("PredScoreLabel"), - "Output(PredScoreLabel) of TargetAssignOp should not be null."); - PADDLE_ENFORCE( - ctx->HasOutput("PredScoreWeight"), - "Output(PredScoreWeight) of TargetAssignOp should not be null."); - - auto blabel_dims = ctx->GetInputDim("EncodedGTBBox"); - auto slabel_dims = ctx->GetInputDim("GTScoreLabel"); + + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of TargetAssignOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("OutWeight"), + "Output(OutWeight) of TargetAssignOp should not be null."); + + auto in_dims = ctx->GetInputDim("X"); auto mi_dims = ctx->GetInputDim("MatchIndices"); - auto neg_dims = ctx->GetInputDim("NegIndices"); - PADDLE_ENFORCE_EQ(blabel_dims.size(), 3UL, - "The rank of Input(EncodedGTBBox) must be 3."); - PADDLE_ENFORCE_EQ(slabel_dims.size(), 2UL, - "The rank of Input(GTScoreLabel) must be 2."); - PADDLE_ENFORCE_EQ(mi_dims.size(), 2UL, + PADDLE_ENFORCE_EQ(in_dims.size(), 3, "The rank of Input(X) must be 3."); + PADDLE_ENFORCE_EQ(mi_dims.size(), 2, "The rank of Input(MatchIndices) must be 2."); - PADDLE_ENFORCE_EQ(neg_dims.size(), 2UL, - "The rank of Input(NegIndices) must be 2."); - - PADDLE_ENFORCE_EQ(blabel_dims[0], slabel_dims[0], - "The 1st dimension (means the total number of " - "ground-truth bounding boxes) of Input(EncodedGTBBox) " - "and Input(GTScoreLabel) must be the same."); - PADDLE_ENFORCE_EQ(blabel_dims[1], mi_dims[1], - "The 2nd dimension (means the number of priod boxes) " - "of Input(EncodedGTBBox) and " - "Input(MatchIndices) must be the same."); - PADDLE_ENFORCE_EQ(blabel_dims[2], 4, - "The 3rd dimension of Input(EncodedGTBBox) must be 4."); + + if (ctx->HasInput("NegIndices")) { + auto neg_dims = ctx->GetInputDim("NegIndices"); + PADDLE_ENFORCE_EQ(neg_dims.size(), 2, + "The rank of Input(NegIndices) must be 2."); + PADDLE_ENFORCE_EQ(neg_dims[1], 1, + "The last dimenstion of Out(NegIndices) must be 1."); + } auto n = mi_dims[0]; - auto np = mi_dims[1]; - ctx->SetOutputDim("PredBBoxLabel", {n, np, 4}); - ctx->SetOutputDim("PredBBoxWeight", {n, np, 1}); - ctx->SetOutputDim("PredScoreLabel", {n, np, 1}); - ctx->SetOutputDim("PredScoreWeight", {n, np, 1}); + auto m = mi_dims[1]; + auto k = in_dims[in_dims.size() - 1]; + ctx->SetOutputDim("Out", {n, m, k}); + ctx->SetOutputDim("OutWeight", {n, m, 1}); } protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - framework::ToDataType( - ctx.Input("EncodedGTBBox")->type()), + framework::ToDataType(ctx.Input("X")->type()), ctx.device_context()); } }; @@ -93,102 +67,87 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker { public: TargetAssignOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("EncodedGTBBox", - "(LoDTensor), The encoded ground-truth bounding boxes with shape " - "[Ng, Np, 4], where Ng is the total number of ground-truth boxes " - "in this mini-batch, Np the number of predictions, 4 is the " - "number of coordinate in [xmin, ymin, xmax, ymax] layout."); - AddInput("GTScoreLabel", - "(LoDTensor, default LoDTensor), The input ground-truth " - "labels with shape [Ng, 1], where the Ng is the same as it in " - "the input of EncodedGTBBox."); + AddInput("X", + "(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. " + "Some elements in X will be assigned to Out based on the " + "MatchIndices and NegIndices."); AddInput("MatchIndices", "(Tensor, default Tensor), The input matched indices " - "with shape [N, Np], where N is the batch size, Np is the same " - "as it in the input of EncodedGTBBox. If MatchIndices[i][j] " - "is -1, the j-th prior box is not matched to any ground-truh " - "box in i-th instance."); + "with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity " + "of column is not matched to any entity of row in i-th instance."); AddInput("NegIndices", "(LoDTensor, default LoDTensor), The input negative example " - "indices with shape [Neg, 1], where is the total number of " - "negative example indices."); - AddAttr("background_label", - "(int, default 0), Label index of background class.") + "indices are an optional input with shape [Neg, 1], where Neg is " + "the total number of negative example indices.") + .AsDispensable(); + AddAttr("mismatch_value", + "(int, default 0), Fill this value to the " + "mismatched location.") .SetDefault(0); - AddOutput("PredBBoxLabel", - "(Tensor), The output encoded ground-truth labels " - "with shape [N, Np, 4], N is the batch size and Np, 4 is the " - "same as they in input of EncodedGTBBox. If MatchIndices[i][j] " - "is -1, the PredBBoxLabel[i][j][:] is the encoded ground-truth " - "box for background_label in i-th instance."); - AddOutput("PredBBoxWeight", - "(Tensor), The weight for PredBBoxLabel with the shape " - "of [N, Np, 1]"); - AddOutput("PredScoreLabel", - "(Tensor, default Tensor), The output score labels for " - "each predictions with shape [N, Np, 1]. If MatchIndices[i][j] " - "is -1, PredScoreLabel[i][j] = background_label."); - AddOutput("PredScoreWeight", - "(Tensor), The weight for PredScoreLabel with the shape " - "of [N, Np, 1]"); + AddOutput("Out", + "(Tensor), The output is a 3D Tensor with shape [N, P, K], " + "N and P is the same as they are in NegIndices, K is the " + "same as it in input of X. If MatchIndices[i][j] " + "is -1, the Out[i][j][0 : K] is the mismatch_value."); + AddOutput("OutWeight", + "(Tensor), The weight for output with the shape of [N, P, 1]"); AddComment(R"DOC( -This operator is, for given the encoded boxes between prior boxes and -ground-truth boxes and ground-truth class labels, to assign classification -and regression targets to each prior box as well as weights to each -prior box. The weights is used to specify which prior box would not contribute -to training loss. - -For each instance, the output `PredBBoxLabel`, `PredBBoxWeight`, -`PredScoreLabel` and `PredScoreWeight` are assigned based on `MatchIndices`. -Assumed that the row offset for each instance in `EncodedGTBBox` is called lod, -this operato assigns classification/regression targets by performing the +This operator can be, for given the target bounding boxes or labels, +to assign classification and regression targets to each prediction as well as +weights to prediction. The weights is used to specify which prediction would +not contribute to training loss. + +For each instance, the output `Out` and`OutWeight` are assigned based on +`MatchIndices` and `NegIndices`. +Assumed that the row offset for each instance in `X` is called lod, +this operator assigns classification/regression targets by performing the following steps: 1. Assigning all outpts based on `MatchIndices`: If id = MatchIndices[i][j] > 0, - PredBBoxLabel[i][j] = EncodedGTBBox[lod[i] + id][j] - PredBBoxWeight[i][j] = 1. - PredScoreLabel[i][j] = GTScoreLabel[lod[i] + id] - PredScoreWeight[i][j] = 1. + Out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K] + OutWeight[i][j] = 1. Otherwise, - PredBBoxLabel[j][j] = [0., 0., 0., 0.] - PredBBoxWeight[i][j] = 0. - PredScoreLabel[i][j] = background_label - PredScoreWeight[i][j] = 0. + Out[j][j][0 : K] = {mismatch_value, mismatch_value, ...} + OutWeight[i][j] = 0. -2. Assigning PredScoreWeight based on `NegIndices`: +2. Assigning OutWeight based on `NegIndices` if `NegIndices` is provided: -Assumed that the row offset for each instance in `NegIndices` is caleed neg_lod, -for i-th instance and all ids of NegIndices in this instance: +Assumed that the row offset for each instance in `NegIndices` is called neg_lod, +for i-th instance and each `id` of NegIndices in this instance: - PredScoreLabel[i][id] = background_label - PredScoreWeight[i][id] = 1.0 + Out[i][id][0 : K] = {mismatch_value, mismatch_value, ...} + OutWeight[i][id] = 1.0 )DOC"); } }; -template -struct NegTargetAssignFunctor { +template +struct NegTargetAssignFunctor { void operator()(const platform::CPUDeviceContext& ctx, const int* neg_indices, - const size_t* lod, const int num, const int num_prior_box, - const int background_label, int* out_label, T* out_label_wt) { - for (int i = 0; i < num; ++i) { + const size_t* lod, const int N, const int M, const int K, + const int mismatch_value, T* out, WT* out_wt) { + for (int i = 0; i < N; ++i) { for (size_t j = lod[i]; j < lod[i + 1]; ++j) { int id = neg_indices[j]; - out_label[i * num_prior_box + id] = background_label; - out_label_wt[i * num_prior_box + id] = static_cast(1.0); + int off = (i * M + id) * K; + for (int k = 0; k < K; ++k) { + out[off + k] = mismatch_value; + out_wt[off + k] = static_cast(1.0); + } } } } }; -template struct NegTargetAssignFunctor; -template struct NegTargetAssignFunctor; +template struct NegTargetAssignFunctor; +template struct NegTargetAssignFunctor; } // namespace operators } // namespace paddle @@ -198,5 +157,5 @@ REGISTER_OP_WITHOUT_GRADIENT(target_assign, ops::TargetAssignOp, ops::TargetAssignOpMaker); REGISTER_OP_CPU_KERNEL( target_assign, - ops::TargetAssignKernel, - ops::TargetAssignKernel); + ops::TargetAssignKernel, + ops::TargetAssignKernel); diff --git a/paddle/fluid/operators/target_assign_op.cu b/paddle/fluid/operators/target_assign_op.cu index 5c012d27ad82eb62d9981c8c73ef5b8cc03adc47..fa02b8aac9052ac5126d705fe16eff47385e1b05 100644 --- a/paddle/fluid/operators/target_assign_op.cu +++ b/paddle/fluid/operators/target_assign_op.cu @@ -17,39 +17,41 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template __global__ void NegTargetAssignKernel(const int* neg_indices, const size_t* lod, - const int num, const int num_prior_box, - const int background_label, - int* out_label, T* out_label_wt) { + const int N, const int M, const int K, + const int mismatch_value, T* out, + WT* out_wt) { int bidx = blockIdx.x; int st = lod[bidx]; int ed = lod[bidx + 1]; - int row_start = bidx * num_prior_box; + int row_start = bidx * M; for (int i = st + threadIdx.x; i < ed; i += blockDim.x) { int id = row_start + neg_indices[i]; - out_label[id] = background_label; - out_label_wt[id] = 1.; + for (int k = 0; k < K; ++k) { + out[id * K + k] = T(mismatch_value); + out_wt[id * K + k] = WT(1.); + } } } -template -struct NegTargetAssignFunctor { +template +struct NegTargetAssignFunctor { void operator()(const platform::CUDADeviceContext& ctx, - const int* neg_indices, const size_t* lod, const int num, - const int num_prior_box, const int background_label, - int* out_label, T* out_label_wt) { + const int* neg_indices, const size_t* lod, const int N, + const int M, const int K, const int mismatch_value, T* out, + WT* out_wt) { const int block_size = 256; - const int grid_size = num; - NegTargetAssignKernel<<>>( - neg_indices, lod, num, num_prior_box, background_label, out_label, - out_label_wt); + const int grid_size = N; + NegTargetAssignKernel<<>>( + neg_indices, lod, N, M, K, mismatch_value, out, out_wt); } }; -template struct NegTargetAssignFunctor; -template struct NegTargetAssignFunctor; +template struct NegTargetAssignFunctor; +template struct NegTargetAssignFunctor; } // namespace operators } // namespace paddle @@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor; namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( target_assign, - ops::TargetAssignKernel, - ops::TargetAssignKernel); + ops::TargetAssignKernel, + ops::TargetAssignKernel); diff --git a/paddle/fluid/operators/target_assign_op.h b/paddle/fluid/operators/target_assign_op.h index 876111523af51ce67e804d6646f404c45c00af12..a1b2fe6f3505e6c96340125bc3ad184b34218f47 100644 --- a/paddle/fluid/operators/target_assign_op.h +++ b/paddle/fluid/operators/target_assign_op.h @@ -19,140 +19,113 @@ limitations under the License. */ namespace paddle { namespace operators { - -template +template struct TargetAssignFunctor { - const T* gt_box_; - const int* gt_label_; + const T* in_; const int* match_indices_; const size_t* lod_; - const int background_label_; - const int64_t num_; - const int64_t num_prior_box_; - - T* out_box_; - T* out_box_wt_; - int* out_label_; - T* out_label_wt_; - - TargetAssignFunctor(const T* gt_box, const int* gt_label, - const int* match_indices, const size_t* lod, - const int background_label, const int64_t num, - const int64_t np, T* out_box, T* out_box_wt, - int* out_label, T* out_label_wt) - : gt_box_(gt_box), - gt_label_(gt_label), + const int mismatch_value_; + const int64_t N_; + const int64_t M_; + const int64_t P_; + const int64_t K_; + + T* out_; + WT* out_wt_; + + TargetAssignFunctor(const T* input, const int* match_indices, + const size_t* lod, const int mismatch_value, + const int64_t N, const int64_t M, const int64_t P, + const int64_t K, T* out, WT* out_wt) + : in_(input), match_indices_(match_indices), lod_(lod), - background_label_(background_label), - num_(num), - num_prior_box_(np), - out_box_(out_box), - out_box_wt_(out_box_wt), - out_label_(out_label), - out_label_wt_(out_label_wt) {} + mismatch_value_(mismatch_value), + N_(N), + M_(M), + P_(P), + K_(K), + out_(out), + out_wt_(out_wt) {} HOSTDEVICE void operator()(size_t i) const { - int row = i / num_prior_box_; - int col = i - row * num_prior_box_; + int h = i / M_; + int w = i - h * M_; - size_t row_off = lod_[row]; - int offset = row * num_prior_box_ + col; + size_t off = lod_[h]; + int id = match_indices_[i]; - int id = match_indices_[offset]; - T* obox = out_box_ + offset * 4; - int* olabel = out_label_ + offset; - T* obox_wt = out_box_wt_ + offset; - T* olabel_wt = out_label_wt_ + offset; + T* out = out_ + i * K_; + WT* out_wt = out_wt_ + i; if (id > -1) { - const T* gtbox = gt_box_ + ((row_off + id) * num_prior_box_ + col) * 4; - - obox[0] = gtbox[0]; - obox[1] = gtbox[1]; - obox[2] = gtbox[2]; - obox[3] = gtbox[3]; - - olabel[0] = gt_label_[row_off + id]; - obox_wt[0] = static_cast(1.); - olabel_wt[0] = static_cast(1.); + int w_off = w % P_; + const T* in = in_ + ((off + id) * P_ + w_off) * K_; + for (int64_t k = 0; k < K_; ++k) { + out[k] = in[k]; + } + out_wt[0] = static_cast(1.); } else { - obox[0] = static_cast(0.); - obox[1] = static_cast(0.); - obox[2] = static_cast(0.); - obox[3] = static_cast(0.); - - olabel[0] = background_label_; - obox_wt[0] = static_cast(0.); - olabel_wt[0] = static_cast(0.); + for (int64_t k = 0; k < K_; ++k) { + out[k] = static_cast(mismatch_value_); + } + out_wt[0] = static_cast(0.); } } }; -template +template struct NegTargetAssignFunctor { void operator()(const platform::DeviceContext& ctx, const int* neg_indices, - const size_t* lod, const int num, const int num_prior_box, - const int background_label, int* out_label, - T* out_label_wt) const; + const size_t* lod, const int N, const int M, const int K, + const int mismatch_value, T* out, WT* out_wt) const; }; -template +template class TargetAssignKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* enc_gt_box = ctx.Input("EncodedGTBBox"); - auto* gt_label = ctx.Input("GTScoreLabel"); + auto* x = ctx.Input("X"); auto* match_indices = ctx.Input("MatchIndices"); - auto* neg_indices = ctx.Input("NegIndices"); - - auto* out_box = ctx.Output("PredBBoxLabel"); - auto* out_box_wt = ctx.Output("PredBBoxWeight"); - auto* out_label = ctx.Output("PredScoreLabel"); - auto* out_label_wt = ctx.Output("PredScoreWeight"); - PADDLE_ENFORCE_EQ(enc_gt_box->lod().size(), 1UL); - PADDLE_ENFORCE_EQ(gt_label->lod().size(), 1UL); - PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL); + auto* out = ctx.Output("Out"); + auto* out_wt = ctx.Output("OutWeight"); - int background_label = ctx.Attr("background_label"); + PADDLE_ENFORCE_EQ(x->lod().size(), 1UL); + int mismatch_value = ctx.Attr("mismatch_value"); - const T* box_data = enc_gt_box->data(); - const int* label_data = gt_label->data(); + const T* x_data = x->data(); const int* match_idx_data = match_indices->data(); - const int* neg_idx_data = neg_indices->data(); - T* obox_data = out_box->mutable_data(ctx.GetPlace()); - T* obox_wt_data = out_box_wt->mutable_data(ctx.GetPlace()); - int* olabel_data = out_label->mutable_data(ctx.GetPlace()); - T* olabel_wt_data = out_label_wt->mutable_data(ctx.GetPlace()); + T* out_data = out->mutable_data(ctx.GetPlace()); + WT* out_wt_data = out_wt->mutable_data(ctx.GetPlace()); - int64_t num = match_indices->dims()[0]; - int64_t num_prior_box = match_indices->dims()[1]; + int64_t n = match_indices->dims()[0]; + int64_t m = match_indices->dims()[1]; + int64_t p = x->dims()[1]; + int64_t k = x->dims()[2]; - auto gt_lod = enc_gt_box->lod().back(); - auto gt_label_lod = gt_label->lod().back(); - auto neg_lod = neg_indices->lod().back(); - for (size_t i = 0; i < gt_lod.size(); ++i) { - PADDLE_ENFORCE_EQ(gt_lod.data()[i], gt_label_lod.data()[i]); - } - - size_t* gt_lod_data = gt_lod.MutableData(ctx.GetPlace()); - size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace()); + auto x_lod = x->lod().back(); + size_t* x_lod_data = x_lod.MutableData(ctx.GetPlace()); - TargetAssignFunctor functor(box_data, label_data, match_idx_data, - gt_lod_data, background_label, num, - num_prior_box, obox_data, obox_wt_data, - olabel_data, olabel_wt_data); + TargetAssignFunctor functor(x_data, match_idx_data, x_lod_data, + mismatch_value, n, m, p, k, out_data, + out_wt_data); auto& device_ctx = ctx.template device_context(); - platform::ForRange for_range(device_ctx, - num * num_prior_box); + platform::ForRange for_range(device_ctx, n * m); for_range(functor); - NegTargetAssignFunctor neg_trg_functor; - neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, num, num_prior_box, - background_label, olabel_data, olabel_wt_data); + auto* neg_indices = ctx.Input("NegIndices"); + if (neg_indices) { + PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL); + const int* neg_idx_data = neg_indices->data(); + auto neg_lod = neg_indices->lod().back(); + size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace()); + NegTargetAssignFunctor neg_trg_functor; + neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, n, m, k, + mismatch_value, out_data, out_wt_data); + } } }; diff --git a/paddle/fluid/platform/cpu_info_test.cc b/paddle/fluid/platform/cpu_info_test.cc index d1fdba13b80629902faf2bcebb646572b36b459e..046758c594ff59ade06ae9904bb67b8418a539fe 100644 --- a/paddle/fluid/platform/cpu_info_test.cc +++ b/paddle/fluid/platform/cpu_info_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/platform/cpu_info.h" -#include "paddle/string/printf.h" +#include "paddle/fluid/string/printf.h" #include #include diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index b22893c0a56f39347894924b3cd6ea64180aa8b6..86e1792801598793f4963f70b4e4c927a9d71893 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -23,8 +23,8 @@ limitations under the License. */ #include #include "paddle/fluid/platform/macros.h" -#include "paddle/string/printf.h" -#include "paddle/string/to_string.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/fluid/string/to_string.h" #ifdef __GNUC__ #include // for __cxa_demangle diff --git a/paddle/fluid/platform/enforce_test.cc b/paddle/fluid/platform/enforce_test.cc index 896a9a04eca80f22bebf87859d277458d9bdb092..baa34a5c7b453cf4e142147b22515e08d6ea7127 100644 --- a/paddle/fluid/platform/enforce_test.cc +++ b/paddle/fluid/platform/enforce_test.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "gtest/gtest.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/string/piece.h" +#include "paddle/fluid/string/piece.h" using StringPiece = paddle::string::Piece; using paddle::string::HasPrefix; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 8924aabd17b18504a863990e1196e664905bbb52..85a6700a61405b2e3b40455fe96b54e810ff86b5 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -35,7 +35,7 @@ limitations under the License. */ #include "paddle/fluid/pybind/exception.h" #include "paddle/fluid/pybind/pybind.h" #include "paddle/fluid/pybind/tensor_py.h" -#include "paddle/string/to_string.h" +#include "paddle/fluid/string/to_string.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/operators/nccl/nccl_gpu_common.h" diff --git a/paddle/string/.clang-format b/paddle/fluid/string/.clang-format similarity index 100% rename from paddle/string/.clang-format rename to paddle/fluid/string/.clang-format diff --git a/paddle/string/CMakeLists.txt b/paddle/fluid/string/CMakeLists.txt similarity index 100% rename from paddle/string/CMakeLists.txt rename to paddle/fluid/string/CMakeLists.txt diff --git a/paddle/string/piece.cc b/paddle/fluid/string/piece.cc similarity index 99% rename from paddle/string/piece.cc rename to paddle/fluid/string/piece.cc index 330ca5f0155e92e34563ca16fffa482502428fda..560413dff1f0e41789275701fd6c61394e95e13c 100644 --- a/paddle/string/piece.cc +++ b/paddle/fluid/string/piece.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/string/piece.h" +#include "piece.h" #include diff --git a/paddle/string/piece.h b/paddle/fluid/string/piece.h similarity index 99% rename from paddle/string/piece.h rename to paddle/fluid/string/piece.h index dcef9791a70884eed04078bf9f97be9617e9d2b0..f2bb6b2c76164fbe63a9b53ec41385014f240ffd 100644 --- a/paddle/string/piece.h +++ b/paddle/fluid/string/piece.h @@ -28,7 +28,7 @@ namespace string { // its syntax is simple as it doesn't own/manage the string, it is // cheap to construct Pieces and pass them around. class Piece { -public: + public: static const size_t npos = static_cast(-1); // We provide non-explicit singleton constructors so users can @@ -55,7 +55,7 @@ public: // Return a string that contains the copy of the referenced data. std::string ToString() const { return std::string(data_, size_); } -private: + private: const char* data_; size_t size_; diff --git a/paddle/string/piece_test.cc b/paddle/fluid/string/piece_test.cc similarity index 99% rename from paddle/string/piece_test.cc rename to paddle/fluid/string/piece_test.cc index 250f26d61f8efe49861a78b83f944fa06fa5ec46..fc17d315b9df99374bb3a7b0a94e385a4abb7b5e 100644 --- a/paddle/string/piece_test.cc +++ b/paddle/fluid/string/piece_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/string/piece.h" +#include "paddle/fluid/string/piece.h" #include diff --git a/paddle/string/printf.h b/paddle/fluid/string/printf.h similarity index 97% rename from paddle/string/printf.h rename to paddle/fluid/string/printf.h index 03809d22092012c7815eb6a89b9fb8bc9eed549b..b55ae21b87ee6a3d158812e5ff872a03288abd20 100644 --- a/paddle/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -71,7 +71,7 @@ #include #include -#include "paddle/string/tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat +#include "tinyformat/tinyformat.h" // https://github.com/c42f/tinyformat namespace paddle { namespace string { diff --git a/paddle/string/printf_test.cc b/paddle/fluid/string/printf_test.cc similarity index 85% rename from paddle/string/printf_test.cc rename to paddle/fluid/string/printf_test.cc index 9815f29bdd82df9062dd5cacf9c3121bb65e3be9..6ca59bdefdbe2b23b49c844e893adb939c3d5a6f 100644 --- a/paddle/string/printf_test.cc +++ b/paddle/fluid/string/printf_test.cc @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/string/printf.h" +#include "printf.h" #include @@ -24,6 +24,6 @@ TEST(StringPrintf, StringPrintf) { long hour = 14; int min = 44; EXPECT_EQ(std::string("Wednesday, July 27, 14:44"), - paddle::string::Sprintf( - "%s, %s %d, %.2d:%.2d", weekday, month, day, hour, min)); + paddle::string::Sprintf("%s, %s %d, %.2d:%.2d", weekday, month, day, + hour, min)); } diff --git a/paddle/string/tinyformat/tinyformat.h b/paddle/fluid/string/tinyformat/tinyformat.h similarity index 92% rename from paddle/string/tinyformat/tinyformat.h rename to paddle/fluid/string/tinyformat/tinyformat.h index 270198dc52c14175f7ab91395668a7828fe2f075..d1a2c47f1a9f258aefbdcce88513e05dda308fa0 100644 --- a/paddle/string/tinyformat/tinyformat.h +++ b/paddle/fluid/string/tinyformat/tinyformat.h @@ -147,7 +147,7 @@ namespace detail { // Test whether type T1 is convertible to type T2 template struct is_convertible { -private: + private: // two types of different size struct fail { char dummy[2]; @@ -160,7 +160,7 @@ private: static succeed tryConvert(const T2 &); static const T1 &makeT1(); -public: + public: // Standard trick: the (...) version of tryConvert will be chosen from // the overload set only if the version taking a T2 doesn't match. // Then we compare the sizes of the return types to check which @@ -170,8 +170,7 @@ public: // Format the value by casting to type fmtT. This default implementation // should never be called. -template ::value> struct formatValueAsType { static void invoke(std::ostream & /*out*/, const T & /*value*/) { assert(0); } @@ -241,11 +240,8 @@ TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char) /// operator<< to format the type T, with special cases for the %c and %p /// conversions. template -inline void formatValue(std::ostream &out, - const char * /*fmtBegin*/, - const char *fmtEnd, - int ntrunc, - const T &value) { +inline void formatValue(std::ostream &out, const char * /*fmtBegin*/, + const char *fmtEnd, int ntrunc, const T &value) { // The mess here is to support the %c and %p conversions: if these // conversions are active we try to convert the type to a char or const // void* respectively and format that instead of the value itself. For the @@ -267,25 +263,22 @@ inline void formatValue(std::ostream &out, } // Overloaded version for char types to support printing as an integer -#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \ - inline void formatValue(std::ostream &out, \ - const char * /*fmtBegin*/, \ - const char *fmtEnd, \ - int /**/, \ - charType value) { \ - switch (*(fmtEnd - 1)) { \ - case 'u': \ - case 'd': \ - case 'i': \ - case 'o': \ - case 'X': \ - case 'x': \ - out << static_cast(value); \ - break; \ - default: \ - out << value; \ - break; \ - } \ +#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \ + inline void formatValue(std::ostream &out, const char * /*fmtBegin*/, \ + const char *fmtEnd, int /**/, charType value) { \ + switch (*(fmtEnd - 1)) { \ + case 'u': \ + case 'd': \ + case 'i': \ + case 'o': \ + case 'X': \ + case 'x': \ + out << static_cast(value); \ + break; \ + default: \ + out << value; \ + break; \ + } \ } // per 3.9.1: char, signed char and unsigned char are all distinct types TINYFORMAT_DEFINE_FORMATVALUE_CHAR(char) @@ -482,7 +475,7 @@ namespace detail { // each argument to be allocated as a homogenous array inside FormatList // whereas a naive implementation based on inheritance does not. class FormatArg { -public: + public: FormatArg() {} template @@ -491,22 +484,17 @@ public: m_formatImpl(&formatImpl), m_toIntImpl(&toIntImpl) {} - void format(std::ostream &out, - const char *fmtBegin, - const char *fmtEnd, + void format(std::ostream &out, const char *fmtBegin, const char *fmtEnd, int ntrunc) const { m_formatImpl(out, fmtBegin, fmtEnd, ntrunc, m_value); } int toInt() const { return m_toIntImpl(m_value); } -private: + private: template - static void formatImpl(std::ostream &out, - const char *fmtBegin, - const char *fmtEnd, - int ntrunc, - const void *value) { + static void formatImpl(std::ostream &out, const char *fmtBegin, + const char *fmtEnd, int ntrunc, const void *value) { formatValue(out, fmtBegin, fmtEnd, ntrunc, *static_cast(value)); } @@ -516,11 +504,8 @@ private: } const void *m_value; - void (*m_formatImpl)(std::ostream &out, - const char *fmtBegin, - const char *fmtEnd, - int ntrunc, - const void *value); + void (*m_formatImpl)(std::ostream &out, const char *fmtBegin, + const char *fmtEnd, int ntrunc, const void *value); int (*m_toIntImpl)(const void *value); }; @@ -569,12 +554,10 @@ inline const char *printFormatStringLiteral(std::ostream &out, // necessary to pull out variable width and precision . The function returns a // pointer to the character after the end of the current format spec. inline const char *streamStateFromFormat(std::ostream &out, - bool &spacePadPositive, - int &ntrunc, + bool &spacePadPositive, int &ntrunc, const char *fmtStart, const detail::FormatArg *formatters, - int &argIndex, - int numFormatters) { + int &argIndex, int numFormatters) { if (*fmtStart != '%') { TINYFORMAT_ERROR( "tinyformat: Not enough conversion specifiers in format string"); @@ -750,10 +733,8 @@ inline const char *streamStateFromFormat(std::ostream &out, } //------------------------------------------------------------------------------ -inline void formatImpl(std::ostream &out, - const char *fmt, - const detail::FormatArg *formatters, - int numFormatters) { +inline void formatImpl(std::ostream &out, const char *fmt, + const detail::FormatArg *formatters, int numFormatters) { // Saved stream state std::streamsize origWidth = out.width(); std::streamsize origPrecision = out.precision(); @@ -765,13 +746,9 @@ inline void formatImpl(std::ostream &out, fmt = printFormatStringLiteral(out, fmt); bool spacePadPositive = false; int ntrunc = -1; - const char *fmtEnd = streamStateFromFormat(out, - spacePadPositive, - ntrunc, - fmt, - formatters, - argIndex, - numFormatters); + const char *fmtEnd = + streamStateFromFormat(out, spacePadPositive, ntrunc, fmt, formatters, + argIndex, numFormatters); if (argIndex >= numFormatters) { // Check args remain after reading any variable width/precision TINYFORMAT_ERROR("tinyformat: Not enough format arguments"); @@ -820,15 +797,14 @@ inline void formatImpl(std::ostream &out, /// information has been stripped from the arguments, leaving just enough of a /// common interface to perform formatting as required. class FormatList { -public: + public: FormatList(detail::FormatArg *formatters, int N) : m_formatters(formatters), m_N(N) {} - friend void vformat(std::ostream &out, - const char *fmt, + friend void vformat(std::ostream &out, const char *fmt, const FormatList &list); -private: + private: const detail::FormatArg *m_formatters; int m_N; }; @@ -841,7 +817,7 @@ namespace detail { // Format list subclass with fixed storage to avoid dynamic allocation template class FormatListN : public FormatList { -public: + public: template FormatListN(const Args &... args) : FormatList(&m_formatterStore[0], N), @@ -849,14 +825,14 @@ public: static_assert(sizeof...(args) == N, "Number of args must be N"); } -private: + private: FormatArg m_formatterStore[N]; }; // Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard template <> class FormatListN<0> : public FormatList { -public: + public: FormatListN() : FormatList(0, 0) {} }; diff --git a/paddle/string/to_string.h b/paddle/fluid/string/to_string.h similarity index 100% rename from paddle/string/to_string.h rename to paddle/fluid/string/to_string.h diff --git a/paddle/string/to_string_test.cc b/paddle/fluid/string/to_string_test.cc similarity index 96% rename from paddle/string/to_string_test.cc rename to paddle/fluid/string/to_string_test.cc index 05650ee8f14f2b050dec07803cdbfa3893eb79d7..1e890f572eb842eba49b859807083f4e5af57df8 100644 --- a/paddle/string/to_string_test.cc +++ b/paddle/fluid/string/to_string_test.cc @@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/string/to_string.h" +#include "to_string.h" #include constexpr char kOutputString[] = "User Defined Output"; class UserDefinedClass { -public: + public: }; std::ostream& operator<<(std::ostream& s, const UserDefinedClass& ins) { diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 2f8dd48efe1d7252f5befd2f682219661a60ba03..1486d5ed2579a49a4722a8b0abdfdba6bf196615 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -115,8 +115,8 @@ EOF -DWITH_AVX=${WITH_AVX:-ON} \ -DWITH_SWIG_PY=ON \ -DWITH_STYLE_CHECK=OFF - make -j `nproc` gen_proto_py - make -j `nproc` paddle_python + make -j `nproc` gen_proto_py framework_py_proto + make -j `nproc` copy_paddle_pybind make -j `nproc` paddle_docs paddle_docs_cn paddle_api_docs popd fi diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index 486c094a6ac3c9ec260e231ba2333c718dfc164b..aa223d87ba8ff09e774172d049f73039d5c9fc28 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -6,9 +6,9 @@ mkdir -p $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build # Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON -make -j `nproc` gen_proto_py -make -j `nproc` paddle_python +cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON -DWITH_STYLE_CHECK=OFF +make -j `nproc` gen_proto_py framework_py_proto +make -j `nproc` copy_paddle_pybind make -j `nproc` paddle_docs paddle_docs_cn paddle_api_docs # check websites for broken links diff --git a/python/paddle/v2/fluid/distribute_transpiler.py b/python/paddle/v2/fluid/distribute_transpiler.py index cd89dba72db2470c1d5bafd4523c21c49e7e2b53..e4675e24b178b2f1745c2b38270ac381ebfe6550 100644 --- a/python/paddle/v2/fluid/distribute_transpiler.py +++ b/python/paddle/v2/fluid/distribute_transpiler.py @@ -33,6 +33,57 @@ class VarBlock: return "%s:%d:%d" % (self.varname, self.offset, self.size) +class UnionFind(object): + """ Union-find data struct. + + Union-find is a data struct that keeps track of a set of elements partitioned + into a number of disjoint (non-overlapping) subsets. + + Reference: + https://en.wikipedia.org/wiki/Disjoint-set_data_structure + + Args: + elements(list): The initialize element list. + """ + + def __init__(self, elementes=None): + self._parents = [] # index -> parent index + self._index = {} # element -> index + self._curr_idx = 0 + if not elementes: + elementes = [] + for ele in elementes: + self._parents.append(self._curr_idx) + self._index.update({ele: self._curr_idx}) + self._curr_idx += 1 + + def find(self, x): + # Find the root index of given element x, + # execute the path compress while findind the root index + if not x in self._index: + return -1 + idx = self._index[x] + while idx != self._parents[idx]: + t = self._parents[idx] + self._parents[idx] = self._parents[t] + idx = t + return idx + + def union(self, x, y): + # Union two given element + x_root = self.find(x) + y_root = self.find(y) + + if x_root == y_root: + return + self._parents[x_root] = y_root + + def is_connected(self, x, y): + # If two given elements have the same root index, + # then they are connected. + return self.find(x) == self.find(y) + + def same_or_split_var(p_name, var_name): return p_name == var_name or p_name.startswith(var_name + ".block") @@ -140,6 +191,7 @@ class DistributeTranspiler: for b in param_blocks: varname, block_id, _ = b.split(":") send_outputs.append(param_var_mapping[varname][int(block_id)]) + # let send_op know which endpoint to send which var to, eplist has the same # order as send_inputs. eplist = split_method(send_inputs, pserver_endpoints) @@ -178,6 +230,21 @@ class DistributeTranspiler: outputs={"Out": [orig_param]}, attrs={"axis": 0}) + self.lr_param_mapping = self._create_lr_param_mapping() + + def _create_lr_param_mapping(self): + lr_mapping = dict() + for _, opt_op in enumerate(self.optimize_ops): + if not opt_op.inputs or not opt_op.inputs.has_key("LearningRate") \ + or not opt_op.inputs.has_key("Param"): + continue + lr = opt_op.inputs["LearningRate"].name + param = opt_op.inputs["Param"].name + if not lr_mapping.has_key(lr): + lr_mapping.update({lr: list()}) + lr_mapping[lr].append(param) + return lr_mapping + def _create_vars_from_blocklist(self, program, block_list): # Create respective variables using the block_list block_map = dict() @@ -208,6 +275,7 @@ class DistributeTranspiler: name="%s.block%d" % (varname, i), psersistable=False, dtype=orig_var.dtype, + type=orig_var.type, shape=splited_shape) # flattend splited var var_mapping[varname].append(var) return var_mapping @@ -269,6 +337,7 @@ class DistributeTranspiler: name="%s.trainer_%d" % (var.name, i), psersistable=var.persistable, dtype=var.dtype, + type=var.type, shape=var.shape) var_list.append(var_each) return var_list @@ -300,52 +369,15 @@ class DistributeTranspiler: pass return orig_shape - def _op_input_var(self, op, varname): - pass - - def _is_op_on_pserver(self, endpoint, all_ops, idx): - """ - Recursively check if the op need to run on current server. - Assume that ops are in the execution order. - """ - param_names = [ - p.name for p in self.param_grad_ep_mapping[endpoint]["params"] - ] - op = all_ops[idx] - input_names = set(op.input_names) - # TODO(typhoonzero): using Param and Grad input name to identify - # that the operator is an optimization operator, need a better way. - if "Param" in input_names: - if op.input("Param")[0] in param_names: - return True - else: - for n in param_names: - if same_or_split_var(n, op.input("Param")[0]) \ - and n != op.input("Param")[0]: - return True - return False - else: - j = idx - 1 - while j >= 0: - prev_op = all_ops[j] - # prev_output_names = [o.name for o in prev_op.outputs.values()] - # prev_input_names = [o.name for o in prev_op.inputs.values()] - # NOTE(typhoonzero): consider list input/output - prev_output_names = prev_op.desc.output_arg_names() - prev_input_names = prev_op.desc.input_arg_names() - found1 = False - found2 = False - for varname in op.desc.input_arg_names(): - if varname in prev_output_names: - found1 = self._is_op_on_pserver(endpoint, all_ops, j) - # later ops may produce output for prev op's next batch use. - for varname in op.desc.output_arg_names(): - if varname in prev_input_names: - found2 = self._is_op_on_pserver(endpoint, all_ops, j) - if found1 or found2: - return True - j -= 1 - return False + def _fetch_var_names(self, param_dict): + res = [] + if not param_dict: + return res + for _, values in param_dict.iteritems(): + if not isinstance(values, list): + values = [values] + res += [v.name for v in values] + return res def _append_pserver_ops(self, optimize_block, opt_op, endpoint): program = optimize_block.program @@ -363,11 +395,7 @@ class DistributeTranspiler: # do not append this op if current endpoint # is not dealing with this grad block return - merged_var = program.global_block().create_var( - name=grad_block.name, - persistable=grad_block.persistable, - dtype=grad_block.dtype, - shape=grad_block.shape) + merged_var = program.global_block().vars[grad_block.name] # append merging ops if trainers > 1 if self.trainers > 1: vars2merge = self._create_var_for_trainers( @@ -398,13 +426,19 @@ class DistributeTranspiler: shape=param_block.shape) new_inputs[key] = tmpvar + elif key == "LearningRate": + # leraning rate variable has already be created by non-optimize op, + # don't create it once again. + new_inputs[key] = program.global_block().vars[opt_op.input(key)[ + 0]] for key in opt_op.input_names: - if key in ["Param", "Grad"]: + new_shape = None + if key in ["Param", "Grad", "LearningRate"]: continue + var = program.global_block().vars[opt_op.input(key)[0]] # update accumulator variable shape param_shape = new_inputs["Param"].shape - var = program.global_block().vars[opt_op.input(key)[0]] new_shape = self._get_optimizer_input_shape(opt_op.type, key, var.shape, param_shape) tmpvar = program.global_block().create_var( @@ -415,12 +449,11 @@ class DistributeTranspiler: new_inputs[key] = tmpvar # change output's ParamOut variable - outputs = self._get_output_map_from_op(program.global_block(), opt_op) - outputs["ParamOut"] = new_inputs["Param"] + opt_op.outputs["ParamOut"] = new_inputs["Param"] optimize_block.append_op( type=opt_op.type, inputs=new_inputs, - outputs=outputs, + outputs=opt_op.outputs, attrs=opt_op.attrs) def _append_pserver_non_opt_ops(self, optimize_block, opt_op): @@ -428,11 +461,10 @@ class DistributeTranspiler: # Append the ops for parameters that do not need to be optimized/updated inputs = self._get_input_map_from_op(self.program.global_block().vars, opt_op) - for var in inputs.itervalues(): - if type(var) == list: - varlist = var - else: - varlist = [var] + for varlist in inputs.itervalues(): + if not isinstance(varlist, list): + varlist = [varlist] + for var in varlist: if not program.global_block().vars.has_key(var.name): program.global_block().create_var( @@ -444,12 +476,70 @@ class DistributeTranspiler: outputs = self._get_output_map_from_op(self.program.global_block().vars, opt_op) + for varlist in outputs.itervalues(): + if not isinstance(varlist, list): + varlist = [varlist] + + for var in varlist: + program.global_block().create_var( + name=var.name, + persistable=var.persistable, + dtype=var.dtype, + shape=var.shape) + optimize_block.append_op( type=opt_op.type, inputs=inputs, outputs=outputs, attrs=opt_op.attrs) + def _is_op_connected(self, op1, op2): + # If one op's input is another op's output or + # one op's output is another op's input, we say + # the two operator is connected. + op1_input_names = self._fetch_var_names(op1.inputs) + op1_output_names = self._fetch_var_names(op1.outputs) + + op2_input_names = self._fetch_var_names(op2.inputs) + op2_output_names = self._fetch_var_names(op2.outputs) + if set(op1_output_names) & set(op2_input_names) or \ + set(op1_input_names) & set(op2_output_names): + return True + return False + + def _create_ufind(self, optimize_ops): + # Create a unit find data struct by optimize ops + ufind = UnionFind(optimize_ops) + for i in xrange(len(optimize_ops)): + for j in xrange(i, len(optimize_ops)): + op1 = optimize_ops[i] + op2 = optimize_ops[j] + if self._is_op_connected(op1, op2): + ufind.union(op1, op2) + return ufind + + def _is_opt_op(self, op): + # NOTE: It's a HACK implement. + # optimize op: SGDOptimize, MomentumOptimizer, AdamOptimizer and etc... + if op.inputs and op.inputs.has_key("Param") \ + and op.inputs.has_key("LearningRate"): + return True + return False + + def _is_opt_op_on_pserver(self, endpoint, op): + param_names = [ + p.name for p in self.param_grad_ep_mapping[endpoint]["params"] + ] + if op.inputs["Param"].name in param_names: + return True + else: + for n in param_names: + param = op.inputs["Param"].name + if same_or_split_var(n, param) and n != op.inputs["Param"].name: + return True + return False + return False + def get_pserver_program(self, endpoint): """ Get pserver side program using the endpoint @@ -469,26 +559,38 @@ class DistributeTranspiler: pserver_program.global_block().create_var( name=v.name, persistable=True, dtype=v.dtype, shape=v.shape) for trainer_id in xrange(self.trainers): - print("create variable for program: %s.trainer_%d" % - (v.name, trainer_id)) pserver_program.global_block().create_var( name="%s.trainer_%d" % (v.name, trainer_id), persistable=True, dtype=v.dtype, shape=v.shape) + # step6 optimize_block = pserver_program.create_block(0) - # Iterate through the ops and append ops as needed - for idx, opt_op in enumerate(self.optimize_ops): - is_op_on_pserver = self._is_op_on_pserver(endpoint, - self.optimize_ops, idx) - if not is_op_on_pserver: - continue - if "Grad" in opt_op.desc.input_arg_names(): - self._append_pserver_ops(optimize_block, opt_op, endpoint) - else: - self._append_pserver_non_opt_ops(optimize_block, opt_op) - + # step 6.1 + # Create a union-find data struct by optimize ops, + # If two ops are connected, we could add these two ops + # into one set. + ufind = self._create_ufind(self.optimize_ops) + # step 6.2 + # Iterate through the ops and append optimize op which + # located on current pserver + opt_op_on_pserver = [] + for _, op in enumerate(self.optimize_ops): + if self._is_opt_op(op) and self._is_opt_op_on_pserver(endpoint, op): + opt_op_on_pserver.append(op) + # step 6.3 + # Iterate through the ops, and if an op and the optimize ops + # which located on current pserver are in one set, then + # append it into the sub program. + for _, op in enumerate(self.optimize_ops): + for _, opt_op in enumerate(opt_op_on_pserver): + if ufind.is_connected(op, opt_op): + if self._is_opt_op(op): + self._append_pserver_ops(optimize_block, op, endpoint) + else: + self._append_pserver_non_opt_ops(optimize_block, op) + break # Append the listen_and_serv op pserver_program.global_block().append_op( type="listen_and_serv", diff --git a/python/paddle/v2/fluid/layers/__init__.py b/python/paddle/v2/fluid/layers/__init__.py index f4fb2ca2798ab8ea8c7c634194d2e0c1371a2b93..cfbbf710b6ac63b9a0fe7d51b0d1940532e948fc 100644 --- a/python/paddle/v2/fluid/layers/__init__.py +++ b/python/paddle/v2/fluid/layers/__init__.py @@ -16,6 +16,8 @@ import ops from ops import * import nn from nn import * +import detection +from detection import * import io from io import * import tensor @@ -31,6 +33,7 @@ from detection import * __all__ = [] __all__ += math_op_patch.__all__ +__all__ += detection.__all__ __all__ += nn.__all__ __all__ += io.__all__ __all__ += tensor.__all__ diff --git a/python/paddle/v2/fluid/layers/detection.py b/python/paddle/v2/fluid/layers/detection.py index 657f3e22fb5a7e6727531218fff57339318e4738..5ec5a0d1e0c55646a32706e0acd78970febe263d 100644 --- a/python/paddle/v2/fluid/layers/detection.py +++ b/python/paddle/v2/fluid/layers/detection.py @@ -22,7 +22,106 @@ from ops import reshape from operator import mul import math -__all__ = ['prior_box', ] +__all__ = [ + 'detection_output', + 'prior_box', +] + + +def detection_output(scores, + loc, + prior_box, + prior_box_var, + background_label=0, + nms_threshold=0.3, + nms_top_k=400, + keep_top_k=200, + score_threshold=0.01, + nms_eta=1.0): + """ + **Detection Output Layer** + + This layer applies the NMS to the output of network and computes the + predict bounding box location. The output's shape of this layer could + be zero if there is no valid bounding box. + + Args: + scores(Variable): A 3-D Tensor with shape [N, C, M] represents the + predicted confidence predictions. N is the batch size, C is the + class number, M is number of bounding boxes. For each category + there are total M scores which corresponding M bounding boxes. + loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the + predicted locations of M bounding bboxes. N is the batch size, + and each bounding box has four coordinate values and the layout + is [xmin, ymin, xmax, ymax]. + prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes, + each box is represented as [xmin, ymin, xmax, ymax], + [xmin, ymin] is the left top coordinate of the anchor box, + if the input is image feature map, they are close to the origin + of the coordinate system. [xmax, ymax] is the right bottom + coordinate of the anchor box. + prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group + of variance. + background_label(float): The index of background label, + the background label will be ignored. If set to -1, then all + categories will be considered. + nms_threshold(float): The threshold to be used in NMS. + nms_top_k(int): Maximum number of detections to be kept according + to the confidences aftern the filtering detections based on + score_threshold. + keep_top_k(int): Number of total bboxes to be kept per image after + NMS step. -1 means keeping all bboxes after NMS step. + score_threshold(float): Threshold to filter out bounding boxes with + low confidence score. If not provided, consider all boxes. + nms_eta(float): The parameter for adaptive NMS. + + Returns: + The detected bounding boxes which are a Tensor. + + Examples: + .. code-block:: python + + pb = layers.data(name='prior_box', shape=[10, 4], + append_batch_size=False, dtype='float32') + pbv = layers.data(name='prior_box_var', shape=[10, 4], + append_batch_size=False, dtype='float32') + loc = layers.data(name='target_box', shape=[21, 4], + append_batch_size=False, dtype='float32') + scores = layers.data(name='scores', shape=[2, 21, 10], + append_batch_size=False, dtype='float32') + nmsed_outs = fluid.layers.detection_output(scores=scores, + loc=loc, + prior_box=pb, + prior_box_var=pbv) + """ + + helper = LayerHelper("detection_output", **locals()) + decoded_box = helper.create_tmp_variable(dtype=loc.dtype) + helper.append_op( + type="box_coder", + inputs={ + 'PriorBox': prior_box, + 'PriorBoxVar': prior_box_var, + 'TargetBox': loc + }, + outputs={'OutputBox': decoded_box}, + attrs={'code_type': 'decode_center_size'}) + nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) + + helper.append_op( + type="multiclass_nms", + inputs={'Scores': scores, + 'BBoxes': decoded_box}, + outputs={'Out': nmsed_outs}, + attrs={ + 'background_label': 0, + 'nms_threshold': nms_threshold, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'score_threshold': score_threshold, + 'nms_eta': 1.0 + }) + return nmsed_outs def prior_box(inputs, @@ -47,7 +146,7 @@ def prior_box(inputs, Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. The details of this algorithm, please refer the section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector)`_ . - + Args: inputs(list): The list of input Variables, the format of all Variables is NCHW. image(Variable): The input image data of PriorBoxOp, the layout is NCHW. @@ -73,7 +172,7 @@ def prior_box(inputs, max_sizes(list, optional, default=None): If `len(inputs) <=2`, max_sizes must be set up, and the length of min_sizes should equal to the length of inputs. name(str, optional, None): Name of the prior box layer. - + Returns: boxes(Variable): the output prior boxes of PriorBoxOp. The layout is [num_priors, 4]. num_priors is the total box count of each @@ -81,20 +180,19 @@ def prior_box(inputs, Variances(Variable): the expanded variances of PriorBoxOp. The layout is [num_priors, 4]. num_priors is the total box count of each position of inputs - + Examples: .. code-block:: python - - prior_boxes( + + prior_box( inputs = [conv1, conv2, conv3, conv4, conv5, conv6], image = data, min_ratio = 20, # 0.20 max_ratio = 90, # 0.90 - steps = [8., 16., 32., 64., 100., 300.], - aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], - base_size = 300, offset = 0.5, + base_size = 300, variance = [0.1,0.1,0.1,0.1], + aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], flip=True, clip=True) """ diff --git a/python/paddle/v2/fluid/layers/math_op_patch.py b/python/paddle/v2/fluid/layers/math_op_patch.py index 9b5f22759cf5ccba841919ec158ff58e6505dcf7..00e4e6907804c7a460e60d960b4aa94ca23b4886 100644 --- a/python/paddle/v2/fluid/layers/math_op_patch.py +++ b/python/paddle/v2/fluid/layers/math_op_patch.py @@ -117,6 +117,7 @@ def monkey_patch_variable(): tmp_name = unique_tmp_name() out = self.block.create_var(name=tmp_name, dtype=lhs_dtype) + self.block.append_op( type=op_type, inputs={'X': [self], diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py index 04b3113690fde072ab74893508298b920ab9599e..ec4c2d2721c27127625f0c7d9fee8155675ebd8d 100644 --- a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py +++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py @@ -99,7 +99,7 @@ elif training_role == "TRAINER": exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): for data in train_reader(): - avg_cost_np = exe.run(fluid.default_main_program(), + avg_cost_np = exe.run(t.get_trainer_program(), feed=feeder.feed(data), fetch_list=[avg_cost]) print("avg_cost_np", avg_cost_np) diff --git a/python/paddle/v2/fluid/tests/test_cpp_reader.py b/python/paddle/v2/fluid/tests/test_cpp_reader.py index 66d6c28ef7d32c33e0f4c010c876dad29a3a0eb6..8d4f454611c1e8a62b54c937a217091a0150085c 100644 --- a/python/paddle/v2/fluid/tests/test_cpp_reader.py +++ b/python/paddle/v2/fluid/tests/test_cpp_reader.py @@ -64,9 +64,7 @@ exe = fluid.Executor(place) [res1, res2] = exe.run(prog, fetch_list=[out1, out2]) -test_pass = res1.shape == (10, 2) and res2.shape == (10, 1) - -if not test_pass: +if not (res1.shape == (10, 2) and res2.shape == (10, 1)): exit(1) exit(0) diff --git a/python/paddle/v2/fluid/tests/test_detection.py b/python/paddle/v2/fluid/tests/test_detection.py new file mode 100644 index 0000000000000000000000000000000000000000..9defc5b2ccb30f27a78874711a27545811b60bb5 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_detection.py @@ -0,0 +1,135 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import paddle.v2.fluid as fluid +import paddle.v2.fluid.core as core +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.layers.detection as detection +from paddle.v2.fluid.framework import Program, program_guard +import unittest +import numpy as np + + +class TestBook(unittest.TestCase): + def test_detection_output(self): + program = Program() + with program_guard(program): + pb = layers.data( + name='prior_box', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + pbv = layers.data( + name='prior_box_var', + shape=[10, 4], + append_batch_size=False, + dtype='float32') + loc = layers.data( + name='target_box', + shape=[20, 4], + append_batch_size=False, + dtype='float32') + scores = layers.data( + name='scores', + shape=[2, 20, 10], + append_batch_size=False, + dtype='float32') + out = layers.detection_output( + scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv) + self.assertIsNotNone(out) + print(str(program)) + + +class TestPriorBox(unittest.TestCase): + def test_prior_box(self): + self.check_prior_box(use_cuda=False) + self.check_prior_box(use_cuda=True) + + def prior_box_output(self, data_shape): + images = fluid.layers.data( + name='pixel', shape=data_shape, dtype='float32') + conv1 = fluid.layers.conv2d( + input=images, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + conv2 = fluid.layers.conv2d( + input=conv1, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + conv3 = fluid.layers.conv2d( + input=conv2, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + conv4 = fluid.layers.conv2d( + input=conv3, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + conv5 = fluid.layers.conv2d( + input=conv4, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + + box, var = detection.prior_box( + inputs=[conv1, conv2, conv3, conv4, conv5, conv5], + image=images, + min_ratio=20, + max_ratio=90, + # steps=[8, 16, 32, 64, 100, 300], + aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], + base_size=300, + offset=0.5, + flip=True, + clip=True) + return box, var + + def check_prior_box(self, use_cuda): + if use_cuda: # prior_box only support CPU. + return + + data_shape = [3, 224, 224] + box, var = self.prior_box_output(data_shape) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + batch = [4] # batch is not used in the prior_box. + + assert box.shape[1] == 4 + assert var.shape[1] == 4 + assert box.shape == var.shape + assert len(box.shape) == 2 + + x = np.random.random(batch + data_shape).astype("float32") + tensor_x = core.LoDTensor() + tensor_x.set(x, place) + boxes, vars = exe.run(fluid.default_main_program(), + feed={'pixel': tensor_x}, + fetch_list=[box, var]) + assert vars.shape == var.shape + assert boxes.shape == box.shape + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_multiclass_nms_op.py b/python/paddle/v2/fluid/tests/test_multiclass_nms_op.py index 3b80d2359b083d30f9a5a7b8cc18aaf1ca5146c1..529223cf40dac646d295941a53455d892274dca3 100644 --- a/python/paddle/v2/fluid/tests/test_multiclass_nms_op.py +++ b/python/paddle/v2/fluid/tests/test_multiclass_nms_op.py @@ -137,7 +137,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold, det_outs = [] lod = [0] for n in range(batch_size): - nmsed_outs, nmsed_num = multiclass_nms(boxes, scores[n], background, + nmsed_outs, nmsed_num = multiclass_nms(boxes[n], scores[n], background, score_threshold, nms_threshold, nms_top_k, keep_top_k) lod.append(lod[-1] + nmsed_num) @@ -145,7 +145,7 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold, for c, indices in nmsed_outs.iteritems(): for idx in indices: - xmin, ymin, xmax, ymax = boxes[idx][:] + xmin, ymin, xmax, ymax = boxes[n][idx][:] det_outs.append([c, scores[n][c][idx], xmin, ymin, xmax, ymax]) return det_outs, lod @@ -179,9 +179,9 @@ class TestMulticlassNMSOp(OpTest): scores = np.reshape(scores, (N, M, C)) scores = np.transpose(scores, (0, 2, 1)) - boxes = np.random.random((M, BOX_SIZE)).astype('float32') - boxes[:, 0:2] = boxes[:, 0:2] * 0.5 - boxes[:, 2:4] = boxes[:, 2:4] * 0.5 + 0.5 + boxes = np.random.random((N, M, BOX_SIZE)).astype('float32') + boxes[:, :, 0:2] = boxes[:, :, 0:2] * 0.5 + boxes[:, :, 2:4] = boxes[:, :, 2:4] * 0.5 + 0.5 nmsed_outs, lod = batched_multiclass_nms(boxes, scores, background, score_threshold, nms_threshold, diff --git a/python/paddle/v2/fluid/tests/test_prior_boxes.py b/python/paddle/v2/fluid/tests/test_prior_boxes.py deleted file mode 100644 index 74d292020c620f45c4b4a15a73c20cd5d59da4bf..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/test_prior_boxes.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import numpy as np -import paddle.v2.fluid as fluid -import paddle.v2.fluid.layers.detection as detection -import paddle.v2.fluid.core as core -import unittest - - -def prior_box_output(data_shape): - images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') - conv1 = fluid.layers.conv2d( - input=images, num_filters=3, filter_size=3, stride=2, use_cudnn=False) - conv2 = fluid.layers.conv2d( - input=conv1, num_filters=3, filter_size=3, stride=2, use_cudnn=False) - conv3 = fluid.layers.conv2d( - input=conv2, num_filters=3, filter_size=3, stride=2, use_cudnn=False) - conv4 = fluid.layers.conv2d( - input=conv3, num_filters=3, filter_size=3, stride=2, use_cudnn=False) - conv5 = fluid.layers.conv2d( - input=conv4, num_filters=3, filter_size=3, stride=2, use_cudnn=False) - - box, var = detection.prior_box( - inputs=[conv1, conv2, conv3, conv4, conv5, conv5], - image=images, - min_ratio=20, - max_ratio=90, - # steps=[8, 16, 32, 64, 100, 300], - aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], - base_size=300, - offset=0.5, - flip=True, - clip=True) - return box, var - - -def main(use_cuda): - if use_cuda: # prior_box only support CPU. - return - - data_shape = [3, 224, 224] - box, var = prior_box_output(data_shape) - - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - batch = [4] # batch is not used in the prior_box. - - assert box.shape[1] == 4 - assert var.shape[1] == 4 - assert box.shape == var.shape - assert len(box.shape) == 2 - - for _ in range(1): - x = np.random.random(batch + data_shape).astype("float32") - tensor_x = core.LoDTensor() - tensor_x.set(x, place) - boxes, vars = exe.run(fluid.default_main_program(), - feed={'pixel': tensor_x}, - fetch_list=[box, var]) - assert vars.shape == var.shape - assert boxes.shape == box.shape - - -class TestFitALine(unittest.TestCase): - def test_cpu(self): - main(use_cuda=False) - - def test_cuda(self): - main(use_cuda=True) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_sequence_expand.py b/python/paddle/v2/fluid/tests/test_sequence_expand.py index 6fc045125fabf6192abc362b6b3d85623308b325..0d37751de48975211abd19e5cfa5ccfe6421d51a 100644 --- a/python/paddle/v2/fluid/tests/test_sequence_expand.py +++ b/python/paddle/v2/fluid/tests/test_sequence_expand.py @@ -73,5 +73,20 @@ class TestSequenceExpandCase3(TestSequenceExpand): self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} +class TestSequenceExpandCase4(TestSequenceExpand): + def set_data(self): + x_data = np.array( + [0.1, 0.3, 0.2, 0.15, 0.25, 0.2, 0.15, 0.25, 0.1, 0.3]).reshape( + [2, 5]).astype('float32') + x_lod = [[ + 0, + 1, + 2, + ]] + y_data = np.random.uniform(0.1, 1, [2, 1]).astype('float32') + y_lod = [[0, 1, 2], [0, 1, 2]] + self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_split_op.py b/python/paddle/v2/fluid/tests/test_split_op.py index b80b64c41be4a45e9d3725297a526e93b399f00d..50347d2df48e3f163054d627fd4624c330690c67 100644 --- a/python/paddle/v2/fluid/tests/test_split_op.py +++ b/python/paddle/v2/fluid/tests/test_split_op.py @@ -20,11 +20,11 @@ from op_test import OpTest class TestSplitOp(OpTest): def setUp(self): self.op_type = "split" - axis = 0 - x = np.random.random((4, 2, 5)).astype('float32') - out = np.split(x, [1, 3], axis) + axis = 1 + x = np.random.random((4, 5, 6)).astype('float32') + out = np.split(x, [2, 3], axis) self.inputs = {'X': x} - self.attrs = {'axis': axis, 'sections': [1, 2, 1]} + self.attrs = {'axis': axis, 'sections': [2, 1, 2]} self.outputs = {'Out': [('out%d' % i, out[i]) \ for i in xrange(len(out))]} diff --git a/python/paddle/v2/fluid/tests/test_target_assign_op.py b/python/paddle/v2/fluid/tests/test_target_assign_op.py index 8a1155c6217401b1b85e3c0bdc47f438f482bcbb..ceda61ff5523d508747663f89af9d3bc23000981 100755 --- a/python/paddle/v2/fluid/tests/test_target_assign_op.py +++ b/python/paddle/v2/fluid/tests/test_target_assign_op.py @@ -43,7 +43,7 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod): def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod, - neg_lod, background_label): + neg_lod, mismatch_value): batch_size, num_prior = match_indices.shape # init target bbox @@ -52,7 +52,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod, trg_box_wt = np.zeros((batch_size, num_prior, 1)).astype('float32') # init target label trg_label = np.ones((batch_size, num_prior, 1)).astype('int32') - trg_label = trg_label * background_label + trg_label = trg_label * mismatch_value # init weight for target label trg_label_wt = np.zeros((batch_size, num_prior, 1)).astype('float32') @@ -65,53 +65,90 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod, # target bbox for v, c in zip(col_val + gt_start, col_ids[0].tolist()): trg_box[i][c][:] = encoded_box[v][c][:] - # weight for target bbox trg_box_wt[i][col_ids] = 1.0 trg_label[i][col_ids] = gt_label[col_val + gt_start] - trg_label_wt[i][col_ids] = 1.0 # set target label weight to 1.0 for the negative samples - neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]] - trg_label_wt[i][neg_ids] = 1.0 + if neg_indices is not None: + neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]] + trg_label_wt[i][neg_ids] = 1.0 return trg_box, trg_box_wt, trg_label, trg_label_wt -class TestTargetAssginOp(OpTest): +class TestTargetAssginFloatType(OpTest): def setUp(self): self.op_type = "target_assign" + num_prior = 120 + num_class = 21 + gt_lod = [0, 5, 11, 23] + neg_lod = [0, 4, 7, 13] + mismatch_value = 0 + batch_size = len(gt_lod) - 1 + num_gt = gt_lod[-1] + + encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32') + gt_label = np.random.randint( + num_class, size=(num_gt, 1)).astype('int32') + + match_indices, neg_indices = gen_match_and_neg_indices(num_prior, + gt_lod, neg_lod) + out, out_wt, _, _ = target_assign(encoded_box, gt_label, match_indices, + neg_indices, gt_lod, neg_lod, + mismatch_value) + + # assign regression targets + x = encoded_box + self.inputs = { + 'X': (x, [gt_lod]), + 'MatchIndices': match_indices, + } + self.attrs = {'mismatch_value': mismatch_value} + self.outputs = { + 'Out': out, + 'OutWeight': out_wt, + } + + def test_check_output(self): + self.check_output() + + +class TestTargetAssginIntType(OpTest): + def setUp(self): + self.op_type = "target_assign" num_prior = 120 num_class = 21 gt_lod = [0, 5, 11, 23] neg_lod = [0, 4, 7, 13] + mismatch_value = 0 batch_size = len(gt_lod) - 1 num_gt = gt_lod[-1] - background_label = 0 encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32') gt_label = np.random.randint( num_class, size=(num_gt, 1)).astype('int32') + match_indices, neg_indices = gen_match_and_neg_indices(num_prior, gt_lod, neg_lod) - trg_box, trg_box_wt, trg_label, trg_label_wt = target_assign( - encoded_box, gt_label, match_indices, neg_indices, gt_lod, neg_lod, - background_label) + _, _, out, out_wt, = target_assign(encoded_box, gt_label, match_indices, + neg_indices, gt_lod, neg_lod, + mismatch_value) + + # assign cassification argets + x = np.reshape(gt_label, (num_gt, 1, 1)) self.inputs = { - 'EncodedGTBBox': (encoded_box, [gt_lod]), - 'GTScoreLabel': (gt_label, [gt_lod]), - 'MatchIndices': (match_indices), + 'X': (x, [gt_lod]), + 'MatchIndices': match_indices, 'NegIndices': (neg_indices, [neg_lod]), } - self.attrs = {'background_label': background_label} + self.attrs = {'mismatch_value': mismatch_value} self.outputs = { - 'PredBBoxLabel': (trg_box), - 'PredBBoxWeight': (trg_box_wt), - 'PredScoreLabel': (trg_label), - 'PredScoreWeight': (trg_label_wt), + 'Out': out, + 'OutWeight': out_wt, } def test_check_output(self):