未验证 提交 96dc3d83 编写于 作者: J jerrywgz 提交者: GitHub

Merge pull request #14511 from jerrywgz/ignore_index_for_sigmoid_cross_entropy

add ignore index for sigmoid cross entropy with logits op, test=develop
...@@ -182,7 +182,7 @@ paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, ...@@ -182,7 +182,7 @@ paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None,
paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)) paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'ignore_index', 'name'], varargs=None, keywords=None, defaults=(-100, None))
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,))
......
...@@ -18,6 +18,7 @@ namespace paddle { ...@@ -18,6 +18,7 @@ namespace paddle {
namespace operators { namespace operators {
using framework::Tensor; using framework::Tensor;
const int kIgnoreIndex = -100;
class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel { class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel {
public: public:
...@@ -100,6 +101,11 @@ class SigmoidCrossEntropyWithLogitsOpMaker ...@@ -100,6 +101,11 @@ class SigmoidCrossEntropyWithLogitsOpMaker
AddOutput("Out", AddOutput("Out",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x D " "(Tensor, default Tensor<float>), a 2-D tensor with shape N x D "
" of elementwise logistic losses."); " of elementwise logistic losses.");
AddAttr<int>("ignore_index",
"(int, default kIgnoreIndex), Specifies a target value that "
"is ignored and"
"does not contribute to the input gradient.")
.SetDefault(kIgnoreIndex);
AddComment(R"DOC( AddComment(R"DOC(
SigmoidCrossEntropyWithLogits Operator. SigmoidCrossEntropyWithLogits Operator.
......
...@@ -15,33 +15,72 @@ limitations under the License. */ ...@@ -15,33 +15,72 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/legacy/utils/Logging.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename T>
struct SigmoidCrossEntropyWithLogitsForward {
HOSTDEVICE SigmoidCrossEntropyWithLogitsForward(const int &ignore_index)
: ignore_index(ignore_index) {}
HOSTDEVICE T operator()(const T &x, const T &label) const {
if (static_cast<int>(label) == ignore_index) {
return static_cast<T>(0.);
}
T term1 = (x > 0) ? x : 0;
T term2 = x * label;
T term3 = std::log(static_cast<T>(1) + std::exp(-(std::abs(x))));
return term1 - term2 + term3;
}
int ignore_index;
};
template <typename T>
struct SigmoidCrossEntropyWithLogitsBackward {
HOSTDEVICE SigmoidCrossEntropyWithLogitsBackward(const int &ignore_index)
: ignore_index(ignore_index) {}
HOSTDEVICE T operator()(const T &x, const T &label) const {
if (static_cast<int>(label) == ignore_index) {
return static_cast<T>(0.);
}
T simoid_x = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-x));
return simoid_x - label;
}
int ignore_index;
};
// Out = max(X, 0) - X * Labels + log(1 + exp(-abs(X))) // Out = max(X, 0) - X * Labels + log(1 + exp(-abs(X)))
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class SigmoidCrossEntropyWithLogitsKernel : public framework::OpKernel<T> { class SigmoidCrossEntropyWithLogitsKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
const framework::Tensor *X = context.Input<framework::Tensor>("X"); const Tensor *X = context.Input<Tensor>("X");
const framework::Tensor *Labels = context.Input<framework::Tensor>("Label"); const Tensor *Labels = context.Input<Tensor>("Label");
framework::Tensor *Out = context.Output<framework::Tensor>("Out"); Tensor *Out = context.Output<Tensor>("Out");
Out->mutable_data<T>(context.GetPlace()); Out->mutable_data<T>(context.GetPlace());
int ignore_index = context.Attr<int>("ignore_index");
auto x = framework::EigenVector<T>::Flatten(*X); auto x = EigenVector<T>::Flatten(*X);
auto labels = framework::EigenVector<T>::Flatten(*Labels); auto labels = EigenVector<T>::Flatten(*Labels);
auto out = framework::EigenVector<T>::Flatten(*Out); auto out = EigenVector<T>::Flatten(*Out);
auto &place = *context.device_context<DeviceContext>().eigen_device(); auto &place = *context.device_context<DeviceContext>().eigen_device();
// term1 = max(x, 0) out.device(place) = x.binaryExpr(
auto term1 = x.cwiseMax(static_cast<T>(0)); labels, SigmoidCrossEntropyWithLogitsForward<T>(ignore_index));
// term2 = x * labels
auto term2 = x * labels;
// term3 = log(1 + exp(-abs(x)))
auto term3 = (static_cast<T>(1) + (-(x.abs())).exp()).log();
out.device(place) = term1 - term2 + term3;
} }
}; };
...@@ -50,23 +89,23 @@ template <typename DeviceContext, typename T> ...@@ -50,23 +89,23 @@ template <typename DeviceContext, typename T>
class SigmoidCrossEntropyWithLogitsGradKernel : public framework::OpKernel<T> { class SigmoidCrossEntropyWithLogitsGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
const framework::Tensor *X = context.Input<framework::Tensor>("X"); const Tensor *X = context.Input<Tensor>("X");
const framework::Tensor *Labels = context.Input<framework::Tensor>("Label"); const Tensor *Labels = context.Input<Tensor>("Label");
const framework::Tensor *dOut = const Tensor *dOut = context.Input<Tensor>(framework::GradVarName("Out"));
context.Input<framework::Tensor>(framework::GradVarName("Out")); Tensor *dX = context.Output<Tensor>(framework::GradVarName("X"));
framework::Tensor *dX =
context.Output<framework::Tensor>(framework::GradVarName("X"));
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X); auto ignore_index = context.Attr<int>("ignore_index");
auto labels = framework::EigenVector<T>::Flatten(*Labels); auto x = EigenVector<T>::Flatten(*X);
auto dout = framework::EigenVector<T>::Flatten(*dOut); auto labels = EigenVector<T>::Flatten(*Labels);
auto dx = framework::EigenVector<T>::Flatten(*dX); auto dout = EigenVector<T>::Flatten(*dOut);
auto dx = EigenVector<T>::Flatten(*dX);
auto &place = auto &place =
*context.template device_context<DeviceContext>().eigen_device(); *context.template device_context<DeviceContext>().eigen_device();
auto sigmoid_x = static_cast<T>(1) / (static_cast<T>(1) + (-x).exp()); auto diff = x.binaryExpr(labels, SigmoidCrossEntropyWithLogitsBackward<T>(
dx.device(place) = dout * (sigmoid_x - labels); static_cast<int>(ignore_index)));
dx.device(place) = dout * diff;
} }
}; };
......
...@@ -172,6 +172,8 @@ __all__ = [ ...@@ -172,6 +172,8 @@ __all__ = [
'lstm', 'lstm',
] ]
kIgnoreIndex = -100
def fc(input, def fc(input,
size, size,
...@@ -1267,7 +1269,7 @@ def dropout(x, ...@@ -1267,7 +1269,7 @@ def dropout(x,
return out return out
def cross_entropy(input, label, soft_label=False, ignore_index=-100): def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
""" """
**Cross Entropy Layer** **Cross Entropy Layer**
...@@ -1314,7 +1316,7 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100): ...@@ -1314,7 +1316,7 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100):
labels. Default: `False`. labels. Default: `False`.
ignore_index (int): Specifies a target value that is ignored and does ignore_index (int): Specifies a target value that is ignored and does
not contribute to the input gradient. Only valid not contribute to the input gradient. Only valid
if soft_label is set to False. Default: -100 if soft_label is set to False. Default: kIgnoreIndex
Returns: Returns:
A 2-D tensor with shape [N x 1], the cross entropy loss. A 2-D tensor with shape [N x 1], the cross entropy loss.
...@@ -5185,7 +5187,7 @@ def multiplex(inputs, index): ...@@ -5185,7 +5187,7 @@ def multiplex(inputs, index):
def softmax_with_cross_entropy(logits, def softmax_with_cross_entropy(logits,
label, label,
soft_label=False, soft_label=False,
ignore_index=-100, ignore_index=kIgnoreIndex,
numeric_stable_mode=False, numeric_stable_mode=False,
return_softmax=False): return_softmax=False):
""" """
...@@ -5243,7 +5245,7 @@ def softmax_with_cross_entropy(logits, ...@@ -5243,7 +5245,7 @@ def softmax_with_cross_entropy(logits,
labels as soft labels. By default, `soft_label` is set to False. labels as soft labels. By default, `soft_label` is set to False.
ignore_index (int): Specifies a target value that is ignored and does ignore_index (int): Specifies a target value that is ignored and does
not contribute to the input gradient. Only valid not contribute to the input gradient. Only valid
if soft_label is set to False. Default: -100 if soft_label is set to False. Default: kIgnoreIndex
numeric_stable_mode (bool): A flag to indicate whether to use a more numeric_stable_mode (bool): A flag to indicate whether to use a more
numerically stable algorithm. Only valid numerically stable algorithm. Only valid
when soft_label is False and GPU is used. when soft_label is False and GPU is used.
...@@ -8415,13 +8417,17 @@ def mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None): ...@@ -8415,13 +8417,17 @@ def mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None):
@templatedoc() @templatedoc()
def sigmoid_cross_entropy_with_logits(x, label, name=None): def sigmoid_cross_entropy_with_logits(x,
label,
ignore_index=kIgnoreIndex,
name=None):
""" """
${comment} ${comment}
Args: Args:
x(${x_type}): ${x_comment} x(${x_type}): ${x_comment}
label(${label_type}): ${label_comment} label(${label_type}): ${label_comment}
ignore_index(&{ignore_index}): ${ignore_index_comment}
name(basestring|None): Name of the output. name(basestring|None): Name of the output.
Returns: Returns:
...@@ -8440,7 +8446,7 @@ def sigmoid_cross_entropy_with_logits(x, label, name=None): ...@@ -8440,7 +8446,7 @@ def sigmoid_cross_entropy_with_logits(x, label, name=None):
type="sigmoid_cross_entropy_with_logits", type="sigmoid_cross_entropy_with_logits",
inputs={"X": x, inputs={"X": x,
"Label": label}, "Label": label},
attrs={}, attrs={"ignore_index": ignore_index},
outputs={"Out": out}) outputs={"Out": out})
return out return out
......
...@@ -170,9 +170,10 @@ class TestBook(unittest.TestCase): ...@@ -170,9 +170,10 @@ class TestBook(unittest.TestCase):
with program_guard(program): with program_guard(program):
dat = layers.data(name='data', shape=[10], dtype='float32') dat = layers.data(name='data', shape=[10], dtype='float32')
lbl = layers.data(name='label', shape=[10], dtype='float32') lbl = layers.data(name='label', shape=[10], dtype='float32')
ignore_index = -1
self.assertIsNotNone( self.assertIsNotNone(
layers.sigmoid_cross_entropy_with_logits( layers.sigmoid_cross_entropy_with_logits(
x=dat, label=lbl)) x=dat, label=lbl, ignore_index=ignore_index))
print(str(program)) print(str(program))
def test_hsigmoid(self): def test_hsigmoid(self):
......
...@@ -56,6 +56,40 @@ class TestSigmoidCrossEntropyWithLogitsOp2(OpTest): ...@@ -56,6 +56,40 @@ class TestSigmoidCrossEntropyWithLogitsOp2(OpTest):
"""Test sigmoid_cross_entropy_with_logit_op with probabalistic label """Test sigmoid_cross_entropy_with_logit_op with probabalistic label
""" """
def setUp(self):
self.op_type = "sigmoid_cross_entropy_with_logits"
batch_size = 64
num_classes = 20
ignore_index = -1
self.inputs = {
'X': logit(
np.random.uniform(0, 1, (batch_size, num_classes))
.astype("float32")),
'Label': np.random.randint(-1, 2, (batch_size, num_classes))
.astype("float32")
}
self.attrs = {'ignore_index': ignore_index, }
# Fw Pass is implemented as elementwise sigmoid followed by
# elementwise logistic loss
# Label * -log(sigmoid(X)) + (1 - label) * -log(1 - sigmoid(X))
sigmoid_X = expit(self.inputs['X'])
term1 = self.inputs['Label'] * np.log(sigmoid_X)
term2 = (1 - self.inputs['Label']) * np.log(1 - sigmoid_X)
out = -term1 - term2
out[np.where(self.inputs['Label'] == ignore_index)] = 0
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestSigmoidCrossEntropyWithLogitsOp3(OpTest):
"""Test sigmoid_cross_entropy_with_logit_op with probabalistic label
"""
def setUp(self): def setUp(self):
self.op_type = "sigmoid_cross_entropy_with_logits" self.op_type = "sigmoid_cross_entropy_with_logits"
batch_size = 64 batch_size = 64
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册