提交 b24afd81 编写于 作者: W wanghaox

update the sub_sequence_op to sequence_slice_op code.

上级 f23d6cc4
......@@ -12,37 +12,39 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sub_sequence_op.h"
#include "paddle/operators/sequence_slice_op.h"
namespace paddle {
namespace operators {
class SubSequenceOp : public framework::OperatorWithKernel {
class SequenceSliceOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SubSequenceOp should not be null.");
"Input(X) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Offset"),
"Input(Offset) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Length"),
"Input(Length) of SequenceSliceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SubSequenceOp should not be null.");
"Output(Out) of SequenceSliceOp should not be null.");
auto input_dims = ctx->GetInputDim("X");
auto offsets = ctx->Attrs().Get<std::vector<int>>("offset");
auto sizes = ctx->Attrs().Get<std::vector<int>>("size");
auto dim_0 = 0;
for (size_t i = 0; i < sizes.size(); ++i) {
dim_0 += sizes[i];
ctx->SetOutputDim("Out", input_dims);
}
framework::DDim out_dims = input_dims;
out_dims[0] = dim_0;
ctx->SetOutputDim("Out", out_dims);
protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};
class SubSequenceGradOp : public framework::OperatorWithKernel {
class SequenceSliceGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -53,34 +55,50 @@ class SubSequenceGradOp : public framework::OperatorWithKernel {
"The gradient of X should not be null.");
ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
}
protected:
framework::OpKernelType GetKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
ctx.device_context());
}
};
class SubSequenceOpMaker : public framework::OpProtoAndCheckerMaker {
class SequenceSliceOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SubSequenceOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
SequenceSliceOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(LoDTensor), "
"the variable-length input of SubSequenceOp");
AddAttr<std::vector<int>>(
"offset",
"A list<int> to describes offset for sub sequence item.");
AddAttr<std::vector<int>>(
"size",
"A list<int> to describes size for sub sequence item.");
AddInput("X",
"(LoDTensor), "
"the input of SequenceSliceOp.");
AddInput("Offset",
"(Tensor), "
"A vector<int> to describes offset for sub sequence item.");
AddInput("Length",
"(Tensor), "
"A vector<int> to describes length for sub sequence item.");
AddOutput("Out",
"(Tensor), Variable-length output of "
"sequence_concat Op.");
"(LoDTensor), output of sequence slice Op.");
AddComment(R"DOC(
Sub Sequence operator
The operator crop a subsequence from given sequence with given start offset and subsequence size.
Sequence slice operator
The operator crop a subsequence from given sequence with given start offset and subsequence length.
It only supports sequence (LoD Tensor with level number is 1).
- Case:
LoD(x) = {{0, 3, 6, 10}}; Dims(x0) = (10, 3, 2)
offset = (0, 1, 1); size = (2, 1, 2)
LoD(Out) = {{0, 2, 3, 5}}; Dims(Out) = (5,3,2)
NOTE: The length of the input, offset and size should be the same. The offset start from 0.
X = [[a1, a2;
b1, b2;
c1, c2]
[d1, d2;
e1, e2]]
LoD(X) = {{0, 3, 5}}; Dims(X) = (4, 1, 2)
Offset = (0, 1); Length = (2, 1)
Out = [[a1, a2;
b1, b2]
[e1, e2]]
LoD(Out) = {{0, 2, 3}}
NOTE: The length of the input, offset and length should be the same. The offset start from 0.
)DOC");
}
};
......@@ -89,11 +107,11 @@ NOTE: The length of the input, offset and size should be the same. The offset st
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP(sub_sequence, ops::SubSequenceOp, ops::SubSequenceOpMaker,
sub_sequence_grad, ops::SubSequenceGradOp);
REGISTER_OP(sequence_slice, ops::SequenceSliceOp, ops::SequenceSliceOpMaker,
sequence_slice_grad, ops::SequenceSliceGradOp);
REGISTER_OP_CPU_KERNEL(
sub_sequence,
ops::SubSequenceOpKernel<paddle::platform::CPUPlace, float>);
sequence_slice,
ops::SequenceSliceOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
sub_sequence_grad,
ops::SubSequenceGradOpKernel<paddle::platform::CPUPlace, float>);
sequence_slice_grad,
ops::SequenceSliceGradOpKernel<paddle::platform::CPUPlace, float>);
......@@ -12,14 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define EIGEN_USE_GPU
#include "paddle/operators/sub_sequence_op.h"
#include "paddle/operators/sequence_slice_op.h"
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
sub_sequence,
ops::SubSequenceOpKernel<paddle::platform::GPUPlace, float>);
sequence_slice,
ops::SequenceSliceOpKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
sub_sequence_grad,
ops::SubSequenceGradOpKernel<paddle::platform::GPUPlace, float>);
sequence_slice_grad,
ops::SequenceSliceGradOpKernel<paddle::platform::GPUPlace, float>);
......@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
#include "paddle/operators/strided_memcpy.h"
namespace paddle {
......@@ -25,109 +25,124 @@ using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;
template <typename T>
LoD subsequenceLoD(const T* in, const std::vector<int> offsets,
const std::vector<int> sizes) {
auto out_lod = in->lod();
LoD SequenceSliceLoD(const T& in, const int64_t* offset_data,
const int64_t* length_data) {
auto out_lod = in.lod();
size_t lod_offset = 0;
auto n = in->lod()[0].size() - 1;
auto n = in.lod()[0].size() - 1;
out_lod[0][0] = 0;
for (size_t i = 0; i < n; ++i) {
lod_offset += sizes[i];
lod_offset += length_data[i];
out_lod[0][i+1] = lod_offset;
}
return out_lod;
}
template <typename Place, typename T>
class SubSequenceOpKernel : public framework::OpKernel<T> {
class SequenceSliceOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<LoDTensor>("X");
std::vector<int> offsets = ctx.Attr<std::vector<int>>("offset");
std::vector<int> sizes = ctx.Attr<std::vector<int>>("size");
auto* offset = ctx.Input<Tensor>("Offset");
auto* length = ctx.Input<Tensor>("Length");
auto* out = ctx.Output<LoDTensor>("Out");
auto offset_len = offsets.size();
auto size_len = sizes.size();
const int64_t* offset_data = offset->data<int64_t>();
const int64_t* length_data = length->data<int64_t>();
if (platform::is_gpu_place(ctx.GetPlace())) {
framework::Tensor offset_cpu;
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context());
offset_data = offset_cpu.data<int64_t>();
framework::Tensor length_cpu;
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context());
length_data = length_cpu.data<int64_t>();
}
auto lod = in->lod();
auto n = lod[0].size() - 1;
PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
PADDLE_ENFORCE_EQ(n, offset_len,
"The length of input and offset should be the same")
PADDLE_ENFORCE_EQ(n, size_len,
"The length of input and size should be the same")
PADDLE_ENFORCE_EQ(offset->dims().size(), 1UL,
"Only support one level sequence now.");
PADDLE_ENFORCE_EQ(length->dims().size(), 1UL,
"Only support one level sequence now.");
PADDLE_ENFORCE_EQ(
n, length->dims()[0],
"The size of input-sequence and length-array should be the same")
PADDLE_ENFORCE_EQ(
n, offset->dims()[0],
"The size of input-sequence and offset-array should be the same")
for (size_t i = 0; i < n; ++i) {
auto offset = offsets[i];
auto size = sizes[i];
PADDLE_ENFORCE_LT(lod[0][i] + offset + size, lod[0][i + 1],
"The target tensor's length overflow")
PADDLE_ENFORCE_LT(0, offset_data[i], "The offset must greater than zero")
PADDLE_ENFORCE_LT(0, length_data[i], "The length must greater than zero")
PADDLE_ENFORCE_LT(lod[0][i] + offset_data[i] + length_data[i],
lod[0][i + 1], "The target tensor's length overflow")
}
out->mutable_data<T>(ctx.GetPlace());
auto out_lod = subsequenceLoD(in, offsets, sizes);
auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
out->set_lod(out_lod);
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), out, static_cast<T>(0));
auto in_stride = framework::stride(in->dims());
auto out_stride = framework::stride(out->dims());
size_t out_offset = 0;
for (size_t i = 0; i < n; ++i) {
auto offset = offsets[i];
auto size = sizes[i];
Tensor in_t = in->Slice(static_cast<int>(lod[0][i] + offset),
static_cast<int>(lod[0][i] + offset + size));
Tensor in_t =
in->Slice(static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] +
length_data[i]));
StridedMemcpy<T>(ctx.device_context(), in_t.data<T>(),
in_stride, in_t.dims(), out_stride,
out->data<T>() + out_offset);
out_offset += size * in_stride[0];
out_offset += length_data[i] * in_stride[0];
}
}
};
template <typename Place, typename T>
class SubSequenceGradOpKernel : public framework::OpKernel<T> {
class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<LoDTensor>("X");
std::vector<int> offsets = ctx.Attr<std::vector<int>>("offset");
std::vector<int> sizes = ctx.Attr<std::vector<int>>("size");
auto* offset = ctx.Input<Tensor>("Offset");
auto* length = ctx.Input<Tensor>("Length");
auto* out_grad =
ctx.Input<framework::LoDTensor>(framework::GradVarName("Out"));
auto* x_grad =
ctx.Output<framework::LoDTensor>(framework::GradVarName("X"));
auto offset_len = offsets.size();
auto size_len = sizes.size();
const int64_t* offset_data = offset->data<int64_t>();
const int64_t* length_data = length->data<int64_t>();
auto lod = in->lod();
auto n = lod[0].size() - 1;
if (platform::is_gpu_place(ctx.GetPlace())) {
framework::Tensor offset_cpu;
offset_cpu.mutable_data<T>(offset->dims(), platform::CPUPlace());
offset_cpu.CopyFrom(*offset, platform::CPUPlace(), ctx.device_context());
offset_data = offset_cpu.data<int64_t>();
// check input data format
PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
PADDLE_ENFORCE_EQ(n, offset_len,
"The length of input and offset should be the same")
PADDLE_ENFORCE_EQ(n, size_len,
"The length of input and size should be the same")
for (size_t i = 0; i < n; ++i) {
auto offset = offsets[i];
auto size = sizes[i];
PADDLE_ENFORCE_LT(lod[0][i] + offset + size, lod[0][i + 1],
"The target tensor's length overflow")
framework::Tensor length_cpu;
length_cpu.mutable_data<T>(length->dims(), platform::CPUPlace());
length_cpu.CopyFrom(*length, platform::CPUPlace(), ctx.device_context());
length_data = length_cpu.data<int64_t>();
}
auto out_lod = subsequenceLoD(in, offsets, sizes);
auto lod = in->lod();
auto out_lod = SequenceSliceLoD(*in, offset_data, length_data);
x_grad->set_lod(lod);
x_grad->mutable_data<T>(ctx.GetPlace());
auto temp = framework::EigenVector<T>::Flatten(*x_grad);
temp.device(ctx.GetEigenDevice<Place>()) = temp.constant(static_cast<T>(0));
math::SetConstant<Place, T> set_zero;
set_zero(ctx.device_context(), x_grad, static_cast<T>(0));
auto out_grad_stride = framework::stride(out_grad->dims());
......@@ -139,11 +154,9 @@ class SubSequenceGradOpKernel : public framework::OpKernel<T> {
auto x_grad_stride = framework::stride(x_grad->dims());
auto offset = offsets[i];
auto size = sizes[i];
Tensor x_grad_t = x_grad->Slice(static_cast<int>(lod[0][i] + offset),
static_cast<int>(lod[0][i] + offset + size));
Tensor x_grad_t = x_grad->Slice(
static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));
StridedMemcpy<T>(ctx.device_context(), out_grad_t.data<T>(),
out_grad_stride, out_grad_t.dims(), x_grad_stride,
......
......@@ -3,31 +3,29 @@ import numpy as np
import sys
from op_test import OpTest
class TestSubSequenceOp(OpTest):
class TestSequenceSliceOp(OpTest):
def set_data(self):
# only supprot one level LoD
x = np.random.random((100, 3, 2)).astype('float32')
lod = [[0, 20, 40, 60, 80, 100]]
offsets = np.array([1, 2, 3, 4, 5]).flatten()
sizes = np.array([10, 8, 6, 4, 2]).flatten()
offset = np.array([1, 2, 3, 4, 5]).flatten().astype("int64")
length = np.array([10, 8, 6, 4, 2]).flatten().astype("int64")
self.inputs = {'X': (x, lod)}
self.attrs = {'offset': offsets, 'size': sizes}
outs = []
self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length}
outs = np.zeros((100, 3, 2)).astype('float32')
out_lod = [[0]]
out_lod_offset = 0
for i in range(len(offsets)):
sub_x = x[lod[0][i] + offsets[i]: lod[0]
[i] + offsets[i] + sizes[i], :]
outs.append(sub_x)
for i in range(len(offset)):
sub_x = x[lod[0][i] + offset[i]: lod[0]
[i] + offset[i] + length[i], :]
out_lod_offset = out_lod_offset + len(sub_x)
outs[out_lod[0][i]: out_lod_offset, :] = sub_x
out_lod[0].append(out_lod_offset)
outs = np.concatenate(outs, axis=0)
self.outputs = {'Out': outs}
self.outputs = {'Out': (outs, out_lod)}
def setUp(self):
self.op_type = "sub_sequence"
self.op_type = "sequence_slice"
self.set_data()
def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册