diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h index ea817bb2391ba9b038cfb58de118fc8c32b7c428..ab68165942a25d3d9c2fd7c84f4d43d8b1d722fa 100644 --- a/paddle/fluid/operators/bpr_loss_op.h +++ b/paddle/fluid/operators/bpr_loss_op.h @@ -22,7 +22,9 @@ namespace paddle { namespace operators { using Tensor = framework::Tensor; - +/*Todo: + *Find a way to adapt TolerableValue, using blas or eigen. + */ template struct TolerableValue { HOSTDEVICE T operator()(const T& x) const { @@ -86,27 +88,27 @@ class BprLossGradientOpKernel : public framework::OpKernel { auto* dx = ctx.Output(framework::GradVarName("X")); const int step_size = x->dims()[0]; - const int num_classes_ = x->dims()[1]; - T* dx_ = dx->mutable_data(ctx.GetPlace()); - const T* dy_ = dy->data(); - const T* x_ = x->data(); - const int64_t* label_pos_ = label_pos->data(); + const int num_classes = x->dims()[1]; + T* dx_data = dx->mutable_data(ctx.GetPlace()); + const T* dy_data = dy->data(); + const T* x_data = x->data(); + const int64_t* label_pos_data = label_pos->data(); for (size_t sample_id = 0; sample_id < step_size; sample_id++) { - for (size_t x_offset = sample_id * num_classes_; - x_offset < (sample_id + 1) * num_classes_; x_offset++) { - dx_[x_offset] = static_cast(0); + for (size_t x_offset = sample_id * num_classes; + x_offset < (sample_id + 1) * num_classes; x_offset++) { + dx_data[x_offset] = static_cast(0); } - auto p_index = sample_id * num_classes_ + label_pos_[sample_id]; - for (size_t ni = 0; ni < num_classes_; ni++) { - if (label_pos_[sample_id] == ni) continue; - auto n_index = sample_id * num_classes_ + ni; - auto grad_ = - -dy_[sample_id] / - ((num_classes_ - 1) * - (1.0f + TolerableValue()(std::exp(x_[p_index] - x_[n_index])))); - dx_[p_index] += grad_; - dx_[n_index] -= grad_; + auto p_index = sample_id * num_classes + label_pos_data[sample_id]; + for (size_t ni = 0; ni < num_classes; ni++) { + if (label_pos_data[sample_id] == ni) continue; + auto n_index = sample_id * num_classes + ni; + auto grad_ = -dy_data[sample_id] / + ((num_classes - 1) * + (1.0f + TolerableValue()(std::exp(x_data[p_index] - + x_data[n_index])))); + dx_data[p_index] += grad_; + dx_data[n_index] -= grad_; } } }