提交 98411745 编写于 作者: Y yangyaming

Finish modified huber loss op.

上级 3a49bae0
...@@ -45,11 +45,25 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -45,11 +45,25 @@ class ModifiedHuberLossOpMaker : public framework::OpProtoAndCheckerMaker {
ModifiedHuberLossOpMaker(framework::OpProto* proto, ModifiedHuberLossOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker) framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", ""); AddInput("X", "Input value of ModifiedHuberLossOp.");
AddInput("Y", ""); AddInput("Y", "Target labels of ModifiedHuberLossOp.");
AddOutput("intermediate_val", "").AsIntermediate(); AddOutput("intermediate_val",
AddOutput("Out", ""); "Variable to save intermediate result which will be reused in "
AddComment(""); "backward processing.")
.AsIntermediate();
AddOutput("Out", "Classification loss for input X.");
AddComment(R"DOC(
Modified huber loss is used in binary classification problem. Dimensions of
input X and target Y are both (N, 1) and so is the dimension of output loss.
Since target Y is not differentiable, cacluating gradient for Y is illegal.
The formulation of modified huber loss is:
L(y, f(x)) = max(0, 1 - yf(x))^2 for yf(x) >= -1,
-4yf(x) otherwise.
Make sure the values of target label Y are in {0, 1} here. The operator will
scale values of Y to {-1, +1} when computing loss and gradients.
)DOC");
} }
}; };
...@@ -64,7 +78,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { ...@@ -64,7 +78,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
auto* intermediate_val = context.Input<Tensor>("intermediate_val"); auto* intermediate_val = context.Input<Tensor>("intermediate_val");
auto* out_grad = context.Input<Tensor>(framework::GradVarName("Out")); auto* out_grad = context.Input<Tensor>(framework::GradVarName("Out"));
auto* x_grad = context.Output<Tensor>(framework::GradVarName("X")); auto* x_grad = context.Output<Tensor>(framework::GradVarName("X"));
auto* y_grad = context.Output<Tensor>(framework::GradVarName("Y"));
PADDLE_ENFORCE_NOT_NULL(x, "Input X must not be null."); PADDLE_ENFORCE_NOT_NULL(x, "Input X must not be null.");
PADDLE_ENFORCE_NOT_NULL(y, "Target Y must not be null."); PADDLE_ENFORCE_NOT_NULL(y, "Target Y must not be null.");
...@@ -80,7 +93,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel { ...@@ -80,7 +93,6 @@ class ModifiedHuberLossGradOp : public framework::OperatorWithKernel {
"Dimension of Out gradient and X must be the same (N*1)."); "Dimension of Out gradient and X must be the same (N*1).");
if (x_grad) x_grad->Resize(x->dims()); if (x_grad) x_grad->Resize(x->dims());
if (y_grad) y_grad->Resize(y->dims());
} }
}; };
......
...@@ -9,24 +9,61 @@ ...@@ -9,24 +9,61 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <thrust/device_ptr.h>
#include <thrust/device_vector.h>
#include <thrust/for_each.h>
#include <thrust/tuple.h>
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/operators/modified_huber_loss_op.h" #include "paddle/operators/modified_huber_loss_op.h"
#include "paddle/platform/hostdevice.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
struct ModifiedHuberLossBackward {
template <typename Tuple>
HOSTDEVICE void operator()(Tuple t) const {
auto inter_val = thrust::get<1>(t);
auto y_val = thrust::get<2>(t);
auto out_grad = thrust::get<3>(t);
if (inter_val < -1) {
thrust::get<0>(t) = -4 * (2 * y_val - 1) * out_grad;
} else if (inter_val < 1) {
thrust::get<0>(t) = -2 * (1 - inter_val) * (2 * y_val - 1) * out_grad;
} else {
thrust::get<0>(t) = 0;
}
}
};
template <typename T> template <typename T>
class ModifiedHuberLossGradGPUKernel : public framework::OpKernel { class ModifiedHuberLossGradGPUKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
// auto* in0 = context.Input<Tensor>("X"); auto* in0 = context.Input<Tensor>("Y");
// auto* in1 = context.Input<Tensor>("Y"); auto* in1 = context.Input<Tensor>("intermediate_val");
// auto* in2 = context.Input<Tensor>("intermediate_val"); auto* in2 = context.Input<Tensor>(framework::GradVarName("Out"));
// auto* in3 = context.Input<Tensor>(framework::GradVarName("Out")); auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
// auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
// auto* out1 = context.Output<Tensor>(framework::GradVarName("X")); if (out0) {
auto counts = framework::product(in1->dims());
auto y_ptr = thrust::device_pointer_cast(in0->data<T>());
auto inter_val_ptr = thrust::device_pointer_cast(in1->data<T>());
auto out_grad_ptr = thrust::device_pointer_cast(in2->data<T>());
thrust::device_ptr<T> x_grad_ptr(
out0->mutable_data<T>(context.GetPlace()));
auto iter_begin = thrust::make_zip_iterator(
thrust::make_tuple(x_grad_ptr, inter_val_ptr, y_ptr, out_grad_ptr));
auto iter_end = thrust::make_zip_iterator(
thrust::make_tuple(x_grad_ptr + counts, inter_val_ptr + counts,
y_ptr + counts, out_grad_ptr + counts));
thrust::for_each(iter_begin, iter_end, ModifiedHuberLossBackward());
}
} }
}; };
......
...@@ -74,49 +74,31 @@ class ModifiedHuberLossKernel : public framework::OpKernel { ...@@ -74,49 +74,31 @@ class ModifiedHuberLossKernel : public framework::OpKernel {
} }
}; };
// Use thrust lib to unify cpu and gpu
// CPU backward kernel // CPU backward kernel
template <typename T> template <typename T>
class ModifiedHuberLossGradCPUKernel : public framework::OpKernel { class ModifiedHuberLossGradCPUKernel : public framework::OpKernel {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* in0 = context.Input<Tensor>("X"); auto* in0 = context.Input<Tensor>("Y");
auto* in1 = context.Input<Tensor>("Y"); auto* in1 = context.Input<Tensor>("intermediate_val");
auto* in2 = context.Input<Tensor>("intermediate_val"); auto* in2 = context.Input<Tensor>(framework::GradVarName("Out"));
auto* in3 = context.Input<Tensor>(framework::GradVarName("Out"));
auto* out0 = context.Output<Tensor>(framework::GradVarName("X")); auto* out0 = context.Output<Tensor>(framework::GradVarName("X"));
auto* out1 = context.Output<Tensor>(framework::GradVarName("X"));
// loop inter_val (x<-1) (x<1) otherwise
const T* p_inter_val = in2->data<T>();
const T* p_out_grad = in3->data<T>();
size_t counts = static_cast<size_t>(framework::product(in2->dims()));
if (out0) { if (out0) {
T* p_x_grad = out0->mutable_data<T>(context.GetPlace()); const T* y_ptr = in0->data<T>();
const T* p_y = in1->data<T>(); const T* inter_val_ptr = in1->data<T>();
ModifiedHuberLossBackward(p_inter_val, p_y, p_out_grad, p_x_grad, counts); const T* out_grad_ptr = in2->data<T>();
} size_t counts = static_cast<size_t>(framework::product(in1->dims()));
T* x_grad_ptr = out0->mutable_data<T>(context.GetPlace());
if (out1) { for (size_t i = 0; i < counts; ++i) {
T* p_y_grad = out1->mutable_data<T>(context.GetPlace()); if (inter_val_ptr[i] < -1) {
const T* p_x = in0->data<T>(); x_grad_ptr[i] = -4 * (2 * y_ptr[i] - 1) * out_grad_ptr[i];
ModifiedHuberLossBackward(p_inter_val, p_x, p_out_grad, p_y_grad, counts); } else if (inter_val_ptr[i] < 1) {
} x_grad_ptr[i] = -2 * (1 - inter_val_ptr[i]) * (2 * y_ptr[i] - 1) *
} out_grad_ptr[i];
} else {
protected: x_grad_ptr[i] = 0;
void ModifiedHuberLossBackward(const T* p_inter_data, const T* p_in_data, }
const T* p_in_grad, T* p_out_grad,
size_t counts) const {
for (size_t i = 0; i < counts; ++i) {
if (p_inter_data[i] < -1) {
p_out_grad[i] = -4 * p_in_data[i] * p_in_grad[i];
} else if (p_inter_data[i] < 1) {
p_out_grad[i] =
-2 * (1 - p_inter_data[i]) * p_in_data[i] * p_in_grad[i];
} else {
p_out_grad[i] = 0;
} }
} }
} }
......
import unittest
from op_test_util import OpTestMeta
from gradient_checker import GradientChecker, create_op
from paddle.v2.framework.op import Operator
import numpy as np
def modified_huber_loss_forward(val):
if val < -1:
return -4 * a
elif val < 1:
return (1 - val) * (1 - val)
else:
return 0
class TestModifiedHuberLossOp_f0(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = 'modified_huber_loss'
samples_num = 32
self.inputs = {
'X': np.random.uniform(-1, 1., (samples_num, 1)).astype('float32'),
'Y': np.random.choice([0, 1], samples_num).reshape((samples_num, 1))
}
product_res = self.inputs['X'] * (2 * self.inputs['Y'] - 1)
loss = np.vectorize(modified_huber_loss_forward)(product_res)
self.outputs = {
'intermediate_val': product_res,
'Out': loss.reshape((samples_num, 1))
}
class TestModifiedHuberLossGradOp(GradientChecker):
def test_modified_huber_loss_b0(self):
samples_num = 10
inputs = {
'X': np.random.uniform(-1, 1, (samples_num, 1)).astype('float32'),
'Y': np.random.choice([0, 1], samples_num).reshape((samples_num, 1))
}
op = Operator(
"modified_huber_loss",
X='X',
Y='Y',
intermediate_val='intermediate_val',
Out='Out')
self.compare_grad(
op, inputs, no_grad_set=set(['intermediate_val', 'Y']))
self.check_grad(op, inputs, set(["X"]), "Out")
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册