提交 97de98cd 编写于 作者: F frankwhzhang

update bpr_loss op code, test=develop

上级 b51df398
......@@ -32,6 +32,13 @@ paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.c
paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None
paddle.fluid.create_lod_tensor ArgSpec(args=['data', 'recursive_seq_lens', 'place'], varargs=None, keywords=None, defaults=None)
paddle.fluid.create_random_int_lodtensor ArgSpec(args=['recursive_seq_lens', 'base_shape', 'place', 'low', 'high'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.__init__ ArgSpec(args=['self', 'proto_file'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.set_batch_size ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.set_dense_slots ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DataFeedDesc.set_use_slots ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.AsyncExecutor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.AsyncExecutor.run ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'debug'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.io.save_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.io.save_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
......@@ -70,7 +77,7 @@ paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name']
paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None))
paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False))
paddle.fluid.layers.batch_norm ArgSpec(args=['input', 'act', 'is_test', 'momentum', 'epsilon', 'param_attr', 'bias_attr', 'data_layout', 'in_place', 'name', 'moving_mean_name', 'moving_variance_name', 'do_model_average_for_mean_and_var', 'fuse_with_relu', 'use_global_stats'], varargs=None, keywords=None, defaults=(None, False, 0.9, 1e-05, None, None, 'NCHW', False, None, None, None, False, False, False))
paddle.fluid.layers.beam_search_decode ArgSpec(args=['ids', 'scores', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.conv2d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
paddle.fluid.layers.conv3d_transpose ArgSpec(args=['input', 'num_filters', 'output_size', 'filter_size', 'padding', 'stride', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, None, 0, 1, 1, None, None, None, True, None, None))
......@@ -176,7 +183,7 @@ paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None,
paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'ignore_index', 'name'], varargs=None, keywords=None, defaults=(-100, None))
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,))
......@@ -188,6 +195,9 @@ paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=Non
paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None))
paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.layers.merge_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.get_tensor_from_selected_rows ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
......@@ -292,6 +302,7 @@ paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'i
paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None))
paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None))
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
......
......@@ -23,19 +23,18 @@ class BprLossOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label_Pos"),
"Input(Label_Pos) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("LabelPos"),
"Input(LabelPos) should be not null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) should be not null.");
auto x_dims = ctx->GetInputDim("X");
auto label_Pos_dims = ctx->GetInputDim("Label_Pos");
auto label_Pos_dims = ctx->GetInputDim("LabelPos");
int rank = x_dims.size();
PADDLE_ENFORCE_EQ(
rank, label_Pos_dims.size(),
"Input(X) and Input(Label_Pos) shall have the same rank.");
PADDLE_ENFORCE_EQ(rank, label_Pos_dims.size(),
"Input(X) and Input(LabelPos) shall have the same rank.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_Pos_dims, 0, rank - 1),
"Input(X) and Input(Label_Pos) shall have the same shape "
"Input(X) and Input(LabelPos) shall have the same shape "
"except the last dimension.");
auto y_dims = x_dims;
......@@ -61,25 +60,25 @@ class BprLossGradientOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Label_Pos"),
"Input(Label_Pos) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("LabelPos"),
"Input(LabelPos) should be not null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")),
"Input(Y@GRAD) shoudl be not null.");
PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
"Output(X@GRAD) should be not null.");
auto x_dims = ctx->GetInputDim("X");
auto label_pos_dims = ctx->GetInputDim("Label_Pos");
auto label_pos_dims = ctx->GetInputDim("LabelPos");
auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
int rank = x_dims.size();
PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
"Input(Y@Grad) and Input(X) should have the same rank.");
PADDLE_ENFORCE_EQ(
label_pos_dims.size(), rank,
"Input(Label_Pos) and Input(X) should have the same rank.");
"Input(LabelPos) and Input(X) should have the same rank.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(label_pos_dims, 0, rank - 1),
"The Input(X) and Input(Label_Pos) should have the same "
"The Input(X) and Input(LabelPos) should have the same "
"shape except the last dimension.");
PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
framework::slice_ddim(dy_dims, 0, rank - 1),
......@@ -88,7 +87,7 @@ class BprLossGradientOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
"The last dimension of Input(Y@Grad) should be 1.");
PADDLE_ENFORCE_EQ(label_pos_dims[rank - 1], 1,
" the last dimension of Input(Label_Pos) should be 1.");
" the last dimension of Input(LabelPos) should be 1.");
ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
ctx->ShareLoD("X", framework::GradVarName("X"));
}
......@@ -112,7 +111,7 @@ class BprLossOpMaker : public framework::OpProtoAndCheckerMaker {
"size is equal to the number of classes. This input is a "
"real number.");
AddInput(
"Label_Pos",
"LabelPos",
"(Tensor), the tensor which represents the ground truth. It has the "
"same shape with 'X' except the last dimension. the last dimension "
"size is 1.");
......@@ -121,14 +120,14 @@ class BprLossOpMaker : public framework::OpProtoAndCheckerMaker {
"with 'X' except that the last dimension size is 1. It "
"represents the sequence bpr loss.");
AddComment(R"DOC(
BprLoss Operator.
Bayesian Personalized Ranking Loss Operator.
This operator belongs to pairwise ranking loss. Label_pos is the desired item.
The loss at a given point in one seesion is defined as:
This operator belongs to pairwise ranking loss. LabelPos is the desired item.
The loss at a given point in one session is defined as:
$Y[i] = -\frac{1}{N_{i}} * \sum_{j=0}^{N_{i}}\log(\sigma(X[i, Label[i]]-X[i, j]))$
Learn more details by reading paper <session-based recommendations with recurrent
neural networks>.
neural networks>(https://arxiv.org/abs/1511.06939)
)DOC");
}
......
......@@ -39,22 +39,22 @@ class BprLossOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* labels_Pos = ctx.Input<Tensor>("Label_Pos");
auto* label_pos = ctx.Input<Tensor>("LabelPos");
auto* y = ctx.Output<Tensor>("Y");
y->mutable_data<T>(ctx.GetPlace());
int rank = x->dims().size();
Tensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
Tensor labels_Pos_2d = framework::ReshapeToMatrix(*labels_Pos, rank - 1);
Tensor labels_Pos_2d = framework::ReshapeToMatrix(*label_pos, rank - 1);
Tensor y_2d = framework::ReshapeToMatrix(*y, rank - 1);
const framework::Tensor* prob = &x_2d;
const framework::Tensor* logits = &x_2d;
const framework::Tensor* labels_pos = &labels_Pos_2d;
framework::Tensor* out = &y_2d;
const int step_size = prob->dims()[0];
const int class_num = prob->dims()[1];
const T* prob_data = prob->data<T>();
const int step_size = logits->dims()[0];
const int class_num = logits->dims()[1];
const T* logits_data = logits->data<T>();
T* loss_data = out->data<T>();
const int64_t* label_pos_data = labels_pos->data<int64_t>();
......@@ -68,73 +68,47 @@ class BprLossOpKernel : public framework::OpKernel<T> {
if (j == lbl_pos) continue;
int index_neg = i * class_num + j;
sum += TolerableValue<T>()(-std::log(
1.0f + TolerableValue<T>()(
std::exp(prob_data[index_neg] - prob_data[index_pos]))));
1.0f + TolerableValue<T>()(std::exp(logits_data[index_neg] -
logits_data[index_pos]))));
}
loss_data[i] = -sum / (class_num - 1);
}
}
};
template <typename T>
class XeGradFunctor {
public:
XeGradFunctor(T* dx,
const T* dy, // NOLINT
const T* x, // NOLINT
const int64_t* label_pos, // NOLINT
size_t num_classes)
: dx_(dx),
dy_(dy),
x_(x),
label_pos_(label_pos),
num_classes_(num_classes) {}
HOSTDEVICE void operator()(size_t sample_id) {
for (size_t x_offset = sample_id * num_classes_;
x_offset < (sample_id + 1) * num_classes_; ++x_offset) {
dx_[x_offset] = static_cast<T>(0);
}
auto p_index = sample_id * num_classes_ + label_pos_[sample_id];
for (size_t ni = 0; ni < num_classes_; ni++) {
if (label_pos_[sample_id] == ni) continue;
auto n_index = sample_id * num_classes_ + ni;
auto grad_ =
-dy_[sample_id] /
((num_classes_ - 1) *
(1.0f + TolerableValue<T>()(std::exp(x_[p_index] - x_[n_index]))));
dx_[p_index] += grad_;
dx_[n_index] -= grad_;
}
}
private:
T* dx_;
const T* dy_;
const T* x_;
const int64_t* label_pos_;
size_t num_classes_;
};
template <typename DeviceContext, typename T>
class BprLossGradientOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* label_pos = ctx.Input<Tensor>("Label_Pos");
auto* label_pos = ctx.Input<Tensor>("LabelPos");
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
T* dx_data = dx->mutable_data<T>(ctx.GetPlace());
int rank = x->dims().size();
int64_t class_num = x->dims()[rank - 1];
XeGradFunctor<T> functor(dx_data, dy->data<T>(), x->data<T>(),
label_pos->data<int64_t>(),
static_cast<size_t>(class_num));
platform::ForRange<DeviceContext> for_range(
ctx.template device_context<DeviceContext>(),
static_cast<size_t>(dy->numel()));
for_range(functor);
const int step_size = x->dims()[0];
const int num_classes_ = x->dims()[1];
T* dx_ = dx->mutable_data<T>(ctx.GetPlace());
const T* dy_ = dy->data<T>();
const T* x_ = x->data<T>();
const int64_t* label_pos_ = label_pos->data<int64_t>();
for (size_t sample_id = 0; sample_id < step_size; sample_id++) {
for (size_t x_offset = sample_id * num_classes_;
x_offset < (sample_id + 1) * num_classes_; x_offset++) {
dx_[x_offset] = static_cast<T>(0);
}
auto p_index = sample_id * num_classes_ + label_pos_[sample_id];
for (size_t ni = 0; ni < num_classes_; ni++) {
if (label_pos_[sample_id] == ni) continue;
auto n_index = sample_id * num_classes_ + ni;
auto grad_ =
-dy_[sample_id] /
((num_classes_ - 1) *
(1.0f + TolerableValue<T>()(std::exp(x_[p_index] - x_[n_index]))));
dx_[p_index] += grad_;
dx_[n_index] -= grad_;
}
}
}
};
......
......@@ -1356,7 +1356,7 @@ def bpr_loss(input, label_pos):
helper.append_op(
type='bpr_loss',
inputs={'X': [input],
'Label_Pos': [label_pos]},
'LabelPos': [label_pos]},
outputs={'Y': [out]})
return out
......
......@@ -39,7 +39,7 @@ class TestBprLossOp1(OpTest):
sum += (-np.log(1.0 + np.exp(X[i][j] - X[i][label_pos[i][0]])))
bpr_loss_result.append(-sum / (class_num - 1))
bpr_loss = np.asmatrix([[x] for x in bpr_loss_result], dtype="float64")
self.inputs = {"X": X, "Label_Pos": label_pos}
self.inputs = {"X": X, "LabelPos": label_pos}
self.outputs = {"Y": bpr_loss}
def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册