From 984117458ca019335d4ba8cd111f0895800651aa Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sat, 9 Sep 2017 16:55:19 +0800 Subject: [PATCH] Finish modified huber loss op. --- paddle/operators/modified_huber_loss_op.cc | 26 ++++++--- paddle/operators/modified_huber_loss_op.cu | 49 +++++++++++++++-- paddle/operators/modified_huber_loss_op.h | 52 ++++++------------ .../tests/test_modified_huber_loss_op.py | 55 +++++++++++++++++++ 4 files changed, 134 insertions(+), 48 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_modified_huber_loss_op.py diff --git a/paddle/operators/modified_huber_loss_op.cc b/paddle/operators/modified_huber_loss_op.cc index 631464bc841..631d406fd45 100644 --- a/paddle/operators/modified_huber_loss_op.cc +++ b/paddle/operators/modified_huber_loss_op.cc @@ -45,11 +45,25 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker { ModifiedHuberLossOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", ""); - AddInput("Y", ""); - AddOutput("intermediate_val", "").AsIntermediate(); - AddOutput("Out", ""); - AddComment(""); + AddInput("X", "Input value of ModifiedHuberLossOp."); + AddInput("Y", "Target labels of ModifiedHuberLossOp."); + AddOutput("intermediate_val", + "Variable to save intermediate result which will be reused in " + "backward processing.") + .AsIntermediate(); + AddOutput("Out", "Classification loss for input X."); + AddComment(R"DOC( +Modified huber loss is used in binary classification problem. Dimensions of +input X and target Y are both (N, 1) and so is the dimension of output loss. +Since target Y is not differentiable, cacluating gradient for Y is illegal. +The formulation of modified huber loss is: + +L(y, f(x)) = max(0, 1 - yf(x))^2 for yf(x) >= -1, + -4yf(x) otherwise. + +Make sure the values of target label Y are in {0, 1} here. The operator will +scale values of Y to {-1, +1} when computing loss and gradients. +)DOC"); } }; @@ -64,7 +78,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { auto* intermediate_val = context.Input("intermediate_val"); auto* out_grad = context.Input(framework::GradVarName("Out")); auto* x_grad = context.Output(framework::GradVarName("X")); - auto* y_grad = context.Output(framework::GradVarName("Y")); PADDLE_ENFORCE_NOT_NULL(x, "Input X must not be null."); PADDLE_ENFORCE_NOT_NULL(y, "Target Y must not be null."); @@ -80,7 +93,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { "Dimension of Out gradient and X must be the same (N*1)."); if (x_grad) x_grad->Resize(x->dims()); - if (y_grad) y_grad->Resize(y->dims()); } }; diff --git a/paddle/operators/modified_huber_loss_op.cu b/paddle/operators/modified_huber_loss_op.cu index 06c710e0c5e..f8aa5043dd8 100644 --- a/paddle/operators/modified_huber_loss_op.cu +++ b/paddle/operators/modified_huber_loss_op.cu @@ -9,24 +9,61 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include +#include +#include +#include #include "paddle/framework/op_registry.h" #include "paddle/operators/modified_huber_loss_op.h" +#include "paddle/platform/hostdevice.h" namespace paddle { namespace operators { using Tensor = framework::Tensor; +struct ModifiedHuberLossBackward { + template + HOSTDEVICE void operator()(Tuple t) const { + auto inter_val = thrust::get<1>(t); + auto y_val = thrust::get<2>(t); + auto out_grad = thrust::get<3>(t); + if (inter_val < -1) { + thrust::get<0>(t) = -4 * (2 * y_val - 1) * out_grad; + } else if (inter_val < 1) { + thrust::get<0>(t) = -2 * (1 - inter_val) * (2 * y_val - 1) * out_grad; + } else { + thrust::get<0>(t) = 0; + } + } +}; + template class ModifiedHuberLossGradGPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - // auto* in0 = context.Input("X"); - // auto* in1 = context.Input("Y"); - // auto* in2 = context.Input("intermediate_val"); - // auto* in3 = context.Input(framework::GradVarName("Out")); - // auto* out0 = context.Output(framework::GradVarName("X")); - // auto* out1 = context.Output(framework::GradVarName("X")); + auto* in0 = context.Input("Y"); + auto* in1 = context.Input("intermediate_val"); + auto* in2 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + + if (out0) { + auto counts = framework::product(in1->dims()); + auto y_ptr = thrust::device_pointer_cast(in0->data()); + auto inter_val_ptr = thrust::device_pointer_cast(in1->data()); + auto out_grad_ptr = thrust::device_pointer_cast(in2->data()); + thrust::device_ptr x_grad_ptr( + out0->mutable_data(context.GetPlace())); + + auto iter_begin = thrust::make_zip_iterator( + thrust::make_tuple(x_grad_ptr, inter_val_ptr, y_ptr, out_grad_ptr)); + + auto iter_end = thrust::make_zip_iterator( + thrust::make_tuple(x_grad_ptr + counts, inter_val_ptr + counts, + y_ptr + counts, out_grad_ptr + counts)); + + thrust::for_each(iter_begin, iter_end, ModifiedHuberLossBackward()); + } } }; diff --git a/paddle/operators/modified_huber_loss_op.h b/paddle/operators/modified_huber_loss_op.h index 2a429ab2e47..13c11684afd 100644 --- a/paddle/operators/modified_huber_loss_op.h +++ b/paddle/operators/modified_huber_loss_op.h @@ -74,49 +74,31 @@ class ModifiedHuberLossKernel : public framework::OpKernel { } }; -// Use thrust lib to unify cpu and gpu // CPU backward kernel template class ModifiedHuberLossGradCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto* in1 = context.Input("Y"); - auto* in2 = context.Input("intermediate_val"); - auto* in3 = context.Input(framework::GradVarName("Out")); + auto* in0 = context.Input("Y"); + auto* in1 = context.Input("intermediate_val"); + auto* in2 = context.Input(framework::GradVarName("Out")); auto* out0 = context.Output(framework::GradVarName("X")); - auto* out1 = context.Output(framework::GradVarName("X")); - - // loop inter_val (x<-1) (x<1) otherwise - const T* p_inter_val = in2->data(); - const T* p_out_grad = in3->data(); - size_t counts = static_cast(framework::product(in2->dims())); if (out0) { - T* p_x_grad = out0->mutable_data(context.GetPlace()); - const T* p_y = in1->data(); - ModifiedHuberLossBackward(p_inter_val, p_y, p_out_grad, p_x_grad, counts); - } - - if (out1) { - T* p_y_grad = out1->mutable_data(context.GetPlace()); - const T* p_x = in0->data(); - ModifiedHuberLossBackward(p_inter_val, p_x, p_out_grad, p_y_grad, counts); - } - } - - protected: - void ModifiedHuberLossBackward(const T* p_inter_data, const T* p_in_data, - const T* p_in_grad, T* p_out_grad, - size_t counts) const { - for (size_t i = 0; i < counts; ++i) { - if (p_inter_data[i] < -1) { - p_out_grad[i] = -4 * p_in_data[i] * p_in_grad[i]; - } else if (p_inter_data[i] < 1) { - p_out_grad[i] = - -2 * (1 - p_inter_data[i]) * p_in_data[i] * p_in_grad[i]; - } else { - p_out_grad[i] = 0; + const T* y_ptr = in0->data(); + const T* inter_val_ptr = in1->data(); + const T* out_grad_ptr = in2->data(); + size_t counts = static_cast(framework::product(in1->dims())); + T* x_grad_ptr = out0->mutable_data(context.GetPlace()); + for (size_t i = 0; i < counts; ++i) { + if (inter_val_ptr[i] < -1) { + x_grad_ptr[i] = -4 * (2 * y_ptr[i] - 1) * out_grad_ptr[i]; + } else if (inter_val_ptr[i] < 1) { + x_grad_ptr[i] = -2 * (1 - inter_val_ptr[i]) * (2 * y_ptr[i] - 1) * + out_grad_ptr[i]; + } else { + x_grad_ptr[i] = 0; + } } } } diff --git a/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py b/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py new file mode 100644 index 00000000000..2b76c53b6e2 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_modified_huber_loss_op.py @@ -0,0 +1,55 @@ +import unittest +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op +from paddle.v2.framework.op import Operator +import numpy as np + + +def modified_huber_loss_forward(val): + if val < -1: + return -4 * a + elif val < 1: + return (1 - val) * (1 - val) + else: + return 0 + + +class TestModifiedHuberLossOp_f0(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = 'modified_huber_loss' + samples_num = 32 + self.inputs = { + 'X': np.random.uniform(-1, 1., (samples_num, 1)).astype('float32'), + 'Y': np.random.choice([0, 1], samples_num).reshape((samples_num, 1)) + } + product_res = self.inputs['X'] * (2 * self.inputs['Y'] - 1) + loss = np.vectorize(modified_huber_loss_forward)(product_res) + + self.outputs = { + 'intermediate_val': product_res, + 'Out': loss.reshape((samples_num, 1)) + } + + +class TestModifiedHuberLossGradOp(GradientChecker): + def test_modified_huber_loss_b0(self): + samples_num = 10 + inputs = { + 'X': np.random.uniform(-1, 1, (samples_num, 1)).astype('float32'), + 'Y': np.random.choice([0, 1], samples_num).reshape((samples_num, 1)) + } + op = Operator( + "modified_huber_loss", + X='X', + Y='Y', + intermediate_val='intermediate_val', + Out='Out') + self.compare_grad( + op, inputs, no_grad_set=set(['intermediate_val', 'Y'])) + self.check_grad(op, inputs, set(["X"]), "Out") + + +if __name__ == '__main__': + unittest.main() -- GitLab