未验证 提交 f276006f 编写于 作者: F fengjiayi 提交者: GitHub

Merge pull request #12694 from JiayiFeng/dev_op_tensor_support

Make cross_entropy_op supporting tensor
...@@ -112,5 +112,6 @@ Tensor& Tensor::Resize(const DDim& dims) { ...@@ -112,5 +112,6 @@ Tensor& Tensor::Resize(const DDim& dims) {
const DDim& Tensor::dims() const { return dims_; } const DDim& Tensor::dims() const { return dims_; }
int64_t Tensor::numel() const { return product(dims_); } int64_t Tensor::numel() const { return product(dims_); }
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -59,6 +59,14 @@ inline T* Tensor::mutable_data(platform::Place place) { ...@@ -59,6 +59,14 @@ inline T* Tensor::mutable_data(platform::Place place) {
} }
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
int rank = src.dims().size();
PADDLE_ENFORCE_GE(
rank, 2,
"'ReshapeToMatrix()' is only used for flatten high rank "
"tensors to matrixs. Can not be used in reshaping vectors.");
if (rank == 2) {
return src;
}
Tensor res; Tensor res;
res.ShareDataWith(src); res.ShareDataWith(src);
res.Resize(flatten_to_2d(src.dims(), num_col_dims)); res.Resize(flatten_to_2d(src.dims(), num_col_dims));
......
...@@ -28,23 +28,26 @@ class CrossEntropyOp : public framework::OperatorWithKernel { ...@@ -28,23 +28,26 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto label_dims = ctx->GetInputDim("Label"); auto label_dims = ctx->GetInputDim("Label");
PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "Input(X)'s rank should be 2."); int rank = x_dims.size();
PADDLE_ENFORCE_EQ(label_dims.size(), 2UL, PADDLE_ENFORCE_EQ(rank, label_dims.size(),
"Input(Label)'s rank should be 2."); "Input(X) and Input(Label) shall have the same rank.");
PADDLE_ENFORCE_EQ(x_dims[0], label_dims[0], PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
"The 1st dimension of Input(X) and Input(Label) should " framework::slice_ddim(label_dims, 0, rank - 1),
"be equal."); "Input(X) and Input(Label) shall have the same shape "
"except the last dimension.");
if (ctx->Attrs().Get<bool>("soft_label")) { if (ctx->Attrs().Get<bool>("soft_label")) {
PADDLE_ENFORCE_EQ(x_dims[1], label_dims[1], PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
"If Attr(soft_label) == true, the 2nd dimension of " "If Attr(soft_label) == true, the last dimension of "
"Input(X) and Input(Label) should be equal."); "Input(X) and Input(Label) should be equal.");
} else { } else {
PADDLE_ENFORCE_EQ(label_dims[1], 1UL, PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1UL,
"If Attr(softLabel) == false, the 2nd dimension of " "If Attr(softLabel) == false, the last dimension of "
"Input(Label) should be 1."); "Input(Label) should be 1.");
} }
ctx->SetOutputDim("Y", {x_dims[0], 1}); auto y_dims = x_dims;
y_dims[rank - 1] = 1;
ctx->SetOutputDim("Y", y_dims);
ctx->ShareLoD("X", /*->*/ "Y"); ctx->ShareLoD("X", /*->*/ "Y");
} }
...@@ -74,24 +77,28 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel { ...@@ -74,24 +77,28 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
auto x_dims = ctx->GetInputDim("X"); auto x_dims = ctx->GetInputDim("X");
auto label_dims = ctx->GetInputDim("Label"); auto label_dims = ctx->GetInputDim("Label");
auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y")); auto dy_dims = ctx->GetInputDim(framework::GradVarName("Y"));
PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank should be 2."); int rank = x_dims.size();
PADDLE_ENFORCE_EQ(dy_dims.size(), 2, "Input(Y@Grad)'s rank should be 2."); PADDLE_ENFORCE_EQ(dy_dims.size(), rank,
PADDLE_ENFORCE_EQ(label_dims.size(), 2, "Input(Label)'s rank should be 2."); "Input(Y@Grad) and Input(X) should have the same rank.");
PADDLE_ENFORCE_EQ(x_dims[0], label_dims[0], PADDLE_ENFORCE_EQ(label_dims.size(), rank,
"The 1st dimension of Input(X) and Input(Label) should " "Input(Label) and Input(X) should have the same rank.");
"be equal."); PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
PADDLE_ENFORCE_EQ(x_dims[0], dy_dims[0], framework::slice_ddim(label_dims, 0, rank - 1),
"The 1st dimension of Input(X) and Input(Y@Grad) should " "The Input(X) and Input(Label) should have the same "
"be equal."); "shape except the last dimension.");
PADDLE_ENFORCE_EQ(dy_dims[1], 1, PADDLE_ENFORCE_EQ(framework::slice_ddim(x_dims, 0, rank - 1),
"The 2nd dimension of Input(Y@Grad) should be 1."); framework::slice_ddim(dy_dims, 0, rank - 1),
"The Input(X) and Input(Y@Grad) should have the same "
"shape except the last dimension.");
PADDLE_ENFORCE_EQ(dy_dims[rank - 1], 1,
"The last dimension of Input(Y@Grad) should be 1.");
if (ctx->Attrs().Get<bool>("soft_label")) { if (ctx->Attrs().Get<bool>("soft_label")) {
PADDLE_ENFORCE_EQ(x_dims[1], label_dims[1], PADDLE_ENFORCE_EQ(x_dims[rank - 1], label_dims[rank - 1],
"When Attr(soft_label) == true, the 2nd dimension of " "When Attr(soft_label) == true, the last dimension of "
"Input(X) and Input(Label) should be equal."); "Input(X) and Input(Label) should be equal.");
} else { } else {
PADDLE_ENFORCE_EQ(label_dims[1], 1, PADDLE_ENFORCE_EQ(label_dims[rank - 1], 1,
"When Attr(soft_label) == false, the 2nd dimension of " "When Attr(soft_label) == false, the last dimension of "
"Input(Label) should be 1."); "Input(Label) should be 1.");
} }
ctx->SetOutputDim(framework::GradVarName("X"), x_dims); ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
...@@ -113,18 +120,20 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -113,18 +120,20 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X",
"(Tensor, default Tensor<float>), a 2-D tensor with shape [N x D]," "(Tensor, default Tensor<float>), a tensor whose last dimension "
" where N is the batch size and D is the number of classes. " "size is equal to the number of classes. This input is a "
"This input is a probability computed by the previous operator, " "probability computed by the previous operator, which is almost "
"which is almost always the result of a softmax operator."); "always the result of a softmax operator.");
AddInput("Label", AddInput(
"(Tensor), the ground truth which is a 2-D tensor. When " "Label",
"soft_label is set to false, Label is a Tensor<int64> with shape " "(Tensor), the tensor which represents the ground truth. It has the "
"[N x 1]. When soft_label is set to true, Label is a " "same shape with 'X' except the last dimension. When soft_label is set "
"Tensor<float/double> with shape [N x D]."); "to false, the last dimension size is 1; when soft_label is set to "
"true, the last dimension size is equal to the number of classes.");
AddOutput("Y", AddOutput("Y",
"(Tensor, default Tensor<float>), a 2-D tensor with shape " "(Tensor, default Tensor<float>), a tensor whose shape is same "
"[N x 1]. The cross entropy loss."); "with 'X' except that the last dimension size is 1. It "
"represents the cross entropy loss.");
AddAttr<bool>("soft_label", AddAttr<bool>("soft_label",
"(bool, default false), a flag indicating whether to " "(bool, default false), a flag indicating whether to "
"interpretate the given labels as soft labels.") "interpretate the given labels as soft labels.")
...@@ -132,6 +141,12 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -132,6 +141,12 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC( AddComment(R"DOC(
CrossEntropy Operator. CrossEntropy Operator.
The input 'X' and 'Label' will first be logically flattened to 2-D matrixs.
The matrix's second dimension(row length) is as same as the original last
dimension, and the first dimension(column length) is the product of all other
original dimensions. Then the softmax computation will take palce on each raw
of flattened matrixs.
It supports both standard cross-entropy and soft-label cross-entropy loss It supports both standard cross-entropy and soft-label cross-entropy loss
computation. computation.
1) One-hot cross-entropy: 1) One-hot cross-entropy:
......
...@@ -33,8 +33,13 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> { ...@@ -33,8 +33,13 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
auto* y = ctx.Output<Tensor>("Y"); auto* y = ctx.Output<Tensor>("Y");
y->mutable_data<T>(ctx.GetPlace()); y->mutable_data<T>(ctx.GetPlace());
int rank = x->dims().size();
Tensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
Tensor labels_2d = framework::ReshapeToMatrix(*labels, rank - 1);
Tensor y_2d = framework::ReshapeToMatrix(*y, rank - 1);
math::CrossEntropyFunctor<DeviceContext, T>()( math::CrossEntropyFunctor<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), y, x, labels, ctx.template device_context<DeviceContext>(), &y_2d, &x_2d, &labels_2d,
ctx.Attr<bool>("soft_label")); ctx.Attr<bool>("soft_label"));
} }
}; };
...@@ -98,9 +103,12 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> { ...@@ -98,9 +103,12 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
auto* dy = ctx.Input<Tensor>(framework::GradVarName("Y")); auto* dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* label = ctx.Input<Tensor>("Label"); auto* label = ctx.Input<Tensor>("Label");
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X")); auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* dx_data = dx->mutable_data<T>(ctx.GetPlace()); T* dx_data = dx->mutable_data<T>(ctx.GetPlace());
int64_t class_num = x->dims()[1]; // Following computation only depends on the last dimension size. So it's
// unnecessary to convert tensors to 2-D views.
int rank = x->dims().size();
int64_t class_num = x->dims()[rank - 1];
if (ctx.Attr<bool>("soft_label")) { if (ctx.Attr<bool>("soft_label")) {
XeSoftlabelGradFunctor<T> functor(dx_data, dy->data<T>(), x->data<T>(), XeSoftlabelGradFunctor<T> functor(dx_data, dy->data<T>(), x->data<T>(),
label->data<T>(), label->data<T>(),
......
...@@ -31,16 +31,12 @@ class SoftmaxKernel : public framework::OpKernel<T> { ...@@ -31,16 +31,12 @@ class SoftmaxKernel : public framework::OpKernel<T> {
// allocate memory on device. // allocate memory on device.
Out->mutable_data<T>(context.GetPlace()); Out->mutable_data<T>(context.GetPlace());
auto dims = X->dims(); int rank = X->dims().size();
auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); Tensor X_2d = framework::ReshapeToMatrix(*X, rank - 1);
framework::LoDTensor flattened_x; Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);
framework::LoDTensor flattened_out;
flattened_x.ShareDataWith(*X).Resize(flattened_dims);
flattened_out.ShareDataWith(*Out).Resize(flattened_dims);
math::SoftmaxFunctor<DeviceContext, T>()( math::SoftmaxFunctor<DeviceContext, T>()(
context.template device_context<DeviceContext>(), &flattened_x, context.template device_context<DeviceContext>(), &X_2d, &Out_2d);
&flattened_out);
} }
}; };
...@@ -55,18 +51,14 @@ class SoftmaxGradKernel : public framework::OpKernel<T> { ...@@ -55,18 +51,14 @@ class SoftmaxGradKernel : public framework::OpKernel<T> {
// allocate memory on device. // allocate memory on device.
dX->mutable_data<T>(context.GetPlace()); dX->mutable_data<T>(context.GetPlace());
auto dims = Out->dims(); int rank = Out->dims().size();
auto flattened_dims = framework::flatten_to_2d(dims, dims.size() - 1); Tensor Out_2d = framework::ReshapeToMatrix(*Out, rank - 1);
framework::LoDTensor flattened_out; Tensor dOut_2d = framework::ReshapeToMatrix(*dOut, rank - 1);
framework::LoDTensor flattened_d_out; Tensor dX_2d = framework::ReshapeToMatrix(*dX, rank - 1);
framework::LoDTensor flattened_d_x;
flattened_out.ShareDataWith(*Out).Resize(flattened_dims);
flattened_d_out.ShareDataWith(*dOut).Resize(flattened_dims);
flattened_d_x.ShareDataWith(*dX).Resize(flattened_dims);
math::SoftmaxGradFunctor<DeviceContext, T>()( math::SoftmaxGradFunctor<DeviceContext, T>()(
context.template device_context<DeviceContext>(), &flattened_out, context.template device_context<DeviceContext>(), &Out_2d, &dOut_2d,
&flattened_d_out, &flattened_d_x); &dX_2d);
} }
}; };
......
...@@ -105,5 +105,107 @@ class TestCrossEntropyOp3(OpTest): ...@@ -105,5 +105,107 @@ class TestCrossEntropyOp3(OpTest):
["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
class TestCrossEntropyOp4(OpTest):
"""Test high rank tensor cross-entropy with discrete one-hot labels.
"""
def setUp(self):
self.op_type = "cross_entropy"
shape = [10, 2, 4]
ins_num = np.prod(np.array(shape))
class_num = 10
X_2d = randomize_probability(ins_num, class_num, dtype='float64')
label_2d = np.random.randint(0, class_num, (ins_num, 1), dtype="int64")
cross_entropy_2d = np.asmatrix(
[[-np.log(X_2d[i][label_2d[i][0]])] for i in range(X_2d.shape[0])],
dtype="float64")
X = X_2d.reshape(shape + [class_num])
label = label_2d.reshape(shape + [1])
cross_entropy = np.array(cross_entropy_2d).reshape(shape + [1])
self.inputs = {"X": X, "Label": label}
self.outputs = {"Y": cross_entropy}
self.attrs = {"soft_label": False}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(["X"], "Y", numeric_grad_delta=0.001)
class TestCrossEntropyOp5(OpTest):
"""Test high rank tensor cross-entropy with vectorized soft labels.
"""
def setUp(self):
self.op_type = "cross_entropy"
shape = [4, 3]
ins_num = np.prod(np.array(shape))
class_num = 37
X_2d = randomize_probability(ins_num, class_num)
label_2d = np.random.uniform(0.1, 1.0,
[ins_num, class_num]).astype("float32")
label_2d /= label_2d.sum(axis=1, keepdims=True)
cross_entropy_2d = (-label_2d * np.log(X_2d)).sum(
axis=1, keepdims=True).astype("float32")
X = X_2d.reshape(shape + [class_num])
label = label_2d.reshape(shape + [class_num])
cross_entropy = np.array(cross_entropy_2d).reshape(shape + [1])
self.inputs = {"X": X, "Label": label}
self.outputs = {"Y": cross_entropy}
self.attrs = {"soft_label": True}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
class TestCrossEntropyOp6(OpTest):
"""Test high rank tensor cross-entropy with vectorized one-hot representation of labels.
"""
def setUp(self):
self.op_type = "cross_entropy"
shape = [4, 3, 2]
ins_num = np.prod(np.array(shape))
class_num = 17
X_2d = randomize_probability(ins_num, class_num)
label_index_2d = np.random.randint(
0, class_num, (ins_num), dtype="int32")
label_2d = np.zeros(X_2d.shape)
label_2d[np.arange(ins_num), label_index_2d] = 1
cross_entropy_2d = np.asmatrix(
[[-np.log(X_2d[i][label_index_2d[i]])]
for i in range(X_2d.shape[0])],
dtype="float32")
X = X_2d.reshape(shape + [class_num])
label = label_2d.reshape(shape + [class_num])
cross_entropy = np.array(cross_entropy_2d).reshape(shape + [1])
self.inputs = {"X": X, "Label": label.astype(np.float32)}
self.outputs = {"Y": cross_entropy}
self.attrs = {"soft_label": True}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(
["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册