From 97de98cd0a1240eacb573e8d117d0e4b928d82b0 Mon Sep 17 00:00:00 2001 From: frankwhzhang Date: Fri, 7 Dec 2018 16:16:18 +0800 Subject: [PATCH] update bpr_loss op code, test=develop --- paddle/fluid/API.spec | 15 ++- paddle/fluid/operators/bpr_loss_op.cc | 35 ++++--- paddle/fluid/operators/bpr_loss_op.h | 92 +++++++------------ python/paddle/fluid/layers/nn.py | 2 +- .../fluid/tests/unittests/test_bpr_loss_op.py | 2 +- 5 files changed, 65 insertions(+), 81 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index e273a852a9..9a90ad4e93 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -32,6 +32,13 @@ paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.c paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None paddle.fluid.create_lod_tensor ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None) paddle.fluid.create_random_int_lodtensor ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None) +paddle.fluid.DataFeedDesc.__init__ ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None) +paddle.fluid.DataFeedDesc.desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None) +paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None) +paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None) +paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'debug'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.io.save_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)) @@ -70,7 +77,7 @@ paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'] paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None)) paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) -paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False)) +paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False)) paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None)) @@ -176,7 +183,7 @@ paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)) -paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'ignore_index', 'name'], varargs=None, keywords=None, defaults=(-100, None)) paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)) @@ -188,6 +195,9 @@ paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=Non paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)) paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)) +paddle.fluid.layers.merge_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.get_tensor_from_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) @@ -292,6 +302,7 @@ paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'i paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None)) paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None)) paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)) paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)) paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) diff --git a/paddle/fluid/operators/bpr_loss_op.cc b/paddle/fluid/operators/bpr_loss_op.cc index 3e6445dbc2..41f2969e6c 100644 --- a/paddle/fluid/operators/bpr_loss_op.cc +++ b/paddle/fluid/operators/bpr_loss_op.cc @@ -23,19 +23,18 @@ class BprLossOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("Label_Pos"), - "Input(Label_Pos) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("LabelPos"), + "Input(LabelPos) should be not null."); PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null."); auto x_dims = ctx->GetInputDim("X"); - auto label_Pos_dims = ctx->GetInputDim("Label_Pos"); + auto label_Pos_dims = ctx->GetInputDim("LabelPos"); int rank = x_dims.size(); - PADDLE_ENFORCE_EQ( - rank, label_Pos_dims.size(), - "Input(X) and Input(Label_Pos) shall have the same rank."); + PADDLE_ENFORCE_EQ(rank, label_Pos_dims.size(), + "Input(X) and Input(LabelPos) shall have the same rank."); PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), framework::slice_ddim(label_Pos_dims, 0, rank - 1), - "Input(X) and Input(Label_Pos) shall have the same shape " + "Input(X) and Input(LabelPos) shall have the same shape " "except the last dimension."); auto y_dims = x_dims; @@ -61,25 +60,25 @@ class BprLossGradientOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null."); - PADDLE_ENFORCE(ctx->HasInput("Label_Pos"), - "Input(Label_Pos) should be not null."); + PADDLE_ENFORCE(ctx->HasInput("LabelPos"), + "Input(LabelPos) should be not null."); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), "Input(Y@GRAD) shoudl be not null."); PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), "Output(X@GRAD) should be not null."); auto x_dims = ctx->GetInputDim("X"); - auto label_pos_dims = ctx->GetInputDim("Label_Pos"); + auto label_pos_dims = ctx->GetInputDim("LabelPos"); auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); int rank = x_dims.size(); PADDLE_ENFORCE_EQ(dy_dims.size(), rank, "Input(Y@Grad) and Input(X) should have the same rank."); PADDLE_ENFORCE_EQ( label_pos_dims.size(), rank, - "Input(Label_Pos) and Input(X) should have the same rank."); + "Input(LabelPos) and Input(X) should have the same rank."); PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), framework::slice_ddim(label_pos_dims, 0, rank - 1), - "The Input(X) and Input(Label_Pos) should have the same " + "The Input(X) and Input(LabelPos) should have the same " "shape except the last dimension."); PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1), framework::slice_ddim(dy_dims, 0, rank - 1), @@ -88,7 +87,7 @@ class BprLossGradientOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1, "The last dimension of Input(Y@Grad) should be 1."); PADDLE_ENFORCE_EQ(label_pos_dims[rank - 1], 1, - " the last dimension of Input(Label_Pos) should be 1."); + " the last dimension of Input(LabelPos) should be 1."); ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->ShareLoD("X", framework::GradVarName("X")); } @@ -112,7 +111,7 @@ class BprLossOpMaker : public framework::OpProtoAndCheckerMaker { "size is equal to the number of classes. This input is a " "real number."); AddInput( - "Label_Pos", + "LabelPos", "(Tensor), the tensor which represents the ground truth. It has the " "same shape with 'X' except the last dimension. the last dimension " "size is 1."); @@ -121,14 +120,14 @@ class BprLossOpMaker : public framework::OpProtoAndCheckerMaker { "with 'X' except that the last dimension size is 1. It " "represents the sequence bpr loss."); AddComment(R"DOC( -BprLoss Operator. +Bayesian Personalized Ranking Loss Operator. -This operator belongs to pairwise ranking loss. Label_pos is the desired item. -The loss at a given point in one seesion is defined as: +This operator belongs to pairwise ranking loss. LabelPos is the desired item. +The loss at a given point in one session is defined as: $Y[i] = -\frac{1}{N_{i}} * \sum_{j=0}^{N_{i}}\log(\sigma(X[i, Label[i]]-X[i, j]))$ Learn more details by reading paper . +neural networks>(https://arxiv.org/abs/1511.06939) )DOC"); } diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h index 4103686de7..ea817bb239 100644 --- a/paddle/fluid/operators/bpr_loss_op.h +++ b/paddle/fluid/operators/bpr_loss_op.h @@ -39,22 +39,22 @@ class BprLossOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); - auto* labels_Pos = ctx.Input("Label_Pos"); + auto* label_pos = ctx.Input("LabelPos"); auto* y = ctx.Output("Y"); y->mutable_data(ctx.GetPlace()); int rank = x->dims().size(); Tensor x_2d = framework::ReshapeToMatrix(*x, rank - 1); - Tensor labels_Pos_2d = framework::ReshapeToMatrix(*labels_Pos, rank - 1); + Tensor labels_Pos_2d = framework::ReshapeToMatrix(*label_pos, rank - 1); Tensor y_2d = framework::ReshapeToMatrix(*y, rank - 1); - const framework::Tensor* prob = &x_2d; + const framework::Tensor* logits = &x_2d; const framework::Tensor* labels_pos = &labels_Pos_2d; framework::Tensor* out = &y_2d; - const int step_size = prob->dims()[0]; - const int class_num = prob->dims()[1]; - const T* prob_data = prob->data(); + const int step_size = logits->dims()[0]; + const int class_num = logits->dims()[1]; + const T* logits_data = logits->data(); T* loss_data = out->data(); const int64_t* label_pos_data = labels_pos->data(); @@ -68,73 +68,47 @@ class BprLossOpKernel : public framework::OpKernel { if (j == lbl_pos) continue; int index_neg = i * class_num + j; sum += TolerableValue()(-std::log( - 1.0f + TolerableValue()( - std::exp(prob_data[index_neg] - prob_data[index_pos])))); + 1.0f + TolerableValue()(std::exp(logits_data[index_neg] - + logits_data[index_pos])))); } loss_data[i] = -sum / (class_num - 1); } } }; -template -class XeGradFunctor { - public: - XeGradFunctor(T* dx, - const T* dy, // NOLINT - const T* x, // NOLINT - const int64_t* label_pos, // NOLINT - size_t num_classes) - : dx_(dx), - dy_(dy), - x_(x), - label_pos_(label_pos), - num_classes_(num_classes) {} - - HOSTDEVICE void operator()(size_t sample_id) { - for (size_t x_offset = sample_id * num_classes_; - x_offset < (sample_id + 1) * num_classes_; ++x_offset) { - dx_[x_offset] = static_cast(0); - } - auto p_index = sample_id * num_classes_ + label_pos_[sample_id]; - for (size_t ni = 0; ni < num_classes_; ni++) { - if (label_pos_[sample_id] == ni) continue; - auto n_index = sample_id * num_classes_ + ni; - auto grad_ = - -dy_[sample_id] / - ((num_classes_ - 1) * - (1.0f + TolerableValue()(std::exp(x_[p_index] - x_[n_index])))); - dx_[p_index] += grad_; - dx_[n_index] -= grad_; - } - } - - private: - T* dx_; - const T* dy_; - const T* x_; - const int64_t* label_pos_; - size_t num_classes_; -}; - template class BprLossGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* label_pos = ctx.Input("Label_Pos"); + auto* label_pos = ctx.Input("LabelPos"); auto* dx = ctx.Output(framework::GradVarName("X")); - T* dx_data = dx->mutable_data(ctx.GetPlace()); - int rank = x->dims().size(); - int64_t class_num = x->dims()[rank - 1]; - XeGradFunctor functor(dx_data, dy->data(), x->data(), - label_pos->data(), - static_cast(class_num)); - platform::ForRange for_range( - ctx.template device_context(), - static_cast(dy->numel())); - for_range(functor); + const int step_size = x->dims()[0]; + const int num_classes_ = x->dims()[1]; + T* dx_ = dx->mutable_data(ctx.GetPlace()); + const T* dy_ = dy->data(); + const T* x_ = x->data(); + const int64_t* label_pos_ = label_pos->data(); + + for (size_t sample_id = 0; sample_id < step_size; sample_id++) { + for (size_t x_offset = sample_id * num_classes_; + x_offset < (sample_id + 1) * num_classes_; x_offset++) { + dx_[x_offset] = static_cast(0); + } + auto p_index = sample_id * num_classes_ + label_pos_[sample_id]; + for (size_t ni = 0; ni < num_classes_; ni++) { + if (label_pos_[sample_id] == ni) continue; + auto n_index = sample_id * num_classes_ + ni; + auto grad_ = + -dy_[sample_id] / + ((num_classes_ - 1) * + (1.0f + TolerableValue()(std::exp(x_[p_index] - x_[n_index])))); + dx_[p_index] += grad_; + dx_[n_index] -= grad_; + } + } } }; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 06d7e429ae..3ba1883999 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1356,7 +1356,7 @@ def bpr_loss(input, label_pos): helper.append_op( type='bpr_loss', inputs={'X': [input], - 'Label_Pos': [label_pos]}, + 'LabelPos': [label_pos]}, outputs={'Y': [out]}) return out diff --git a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py index 2af6461aed..d137f4a6fb 100644 --- a/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_bpr_loss_op.py @@ -39,7 +39,7 @@ class TestBprLossOp1(OpTest): sum += (-np.log(1.0 + np.exp(X[i][j] - X[i][label_pos[i][0]]))) bpr_loss_result.append(-sum / (class_num - 1)) bpr_loss = np.asmatrix([[x] for x in bpr_loss_result], dtype="float64") - self.inputs = {"X": X, "Label_Pos": label_pos} + self.inputs = {"X": X, "LabelPos": label_pos} self.outputs = {"Y": bpr_loss} def test_check_output(self): -- GitLab