未验证 提交 f2ae7e32 编写于 作者: Y Yibing Liu 提交者: GitHub

Support seq len equal to 0 in sequence ops (#16964)

* Merge conflicts with 1.4

* Resolve conflicts

* Fix some comments

test=release/1.4

* Fix py3 error

test=release/1.4
上级 5ab96d35
...@@ -158,7 +158,7 @@ bool CheckLoD(const LoD &in, int tensor_height) { ...@@ -158,7 +158,7 @@ bool CheckLoD(const LoD &in, int tensor_height) {
if (level.size() < 2) return false; if (level.size() < 2) return false;
// check: the first offset(the begin offset) of each level should be 0. // check: the first offset(the begin offset) of each level should be 0.
if (level.front() != 0) return false; if (level.front() != 0) return false;
// check: all the offsets in a level should be ascending(allow same items) // check: all the offsets in a level should be non-descending
if (!std::is_sorted(level.begin(), level.end())) { if (!std::is_sorted(level.begin(), level.end())) {
return false; return false;
} }
...@@ -182,7 +182,7 @@ bool CheckAbsLoD(const LoD &in, int tensor_height) { ...@@ -182,7 +182,7 @@ bool CheckAbsLoD(const LoD &in, int tensor_height) {
if (in.empty()) return true; if (in.empty()) return true;
for (const auto &level : in) { for (const auto &level : in) {
// check: all the offsets in a level should be ascending(no same items // check: all the offsets in a level should be ascending(no same items
// allows). // allowed).
if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) { if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
if (a < b) return true; if (a < b) return true;
return false; return false;
......
...@@ -79,7 +79,7 @@ bool operator==(const LoD& a, const LoD& b); ...@@ -79,7 +79,7 @@ bool operator==(const LoD& a, const LoD& b);
* *
* It will check two things: * It will check two things:
* *
* 1. all the offsets in a level should be ascending(no same items allows). * 1. all the offsets in a level should be non-descending.
* 2. there should be more than 2 offsets existing in each level. * 2. there should be more than 2 offsets existing in each level.
* 3. the higher level's last offset should equals the lower level's size-1. * 3. the higher level's last offset should equals the lower level's size-1.
* 4. the first offset(the begin offset) of each level should be 0. * 4. the first offset(the begin offset) of each level should be 0.
...@@ -95,7 +95,7 @@ bool CheckLoD(const LoD& in, int tensor_height = -1); ...@@ -95,7 +95,7 @@ bool CheckLoD(const LoD& in, int tensor_height = -1);
* - Empty lod is treated as valid. * - Empty lod is treated as valid.
* *
* It will check two things: * It will check two things:
* 1. all the offsets in a level should be ascending(no same items allows) * 1. all the offsets in a level should be ascending(no same items allowed).
* 2. there should be more than 2 offsets existing in each level. * 2. there should be more than 2 offsets existing in each level.
* 3. the first offset of each level should be 0, and the last should be the * 3. the first offset of each level should be 0, and the last should be the
* same(the height of underlying tensor) or `tensor_height` if * same(the height of underlying tensor) or `tensor_height` if
......
...@@ -46,6 +46,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> { ...@@ -46,6 +46,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
math::SetConstant<DeviceContext, int64_t>()( math::SetConstant<DeviceContext, int64_t>()(
ctx.template device_context<DeviceContext>(), decoded_path, 0); ctx.template device_context<DeviceContext>(), decoded_path, 0);
for (size_t i = 0; i < seq_num; ++i) { for (size_t i = 0; i < seq_num; ++i) {
if (lod[level][i] == lod[level][i + 1]) continue;
int start_pos = static_cast<int>(lod[level][i]); int start_pos = static_cast<int>(lod[level][i]);
int end_pos = static_cast<int>(lod[level][i + 1]); int end_pos = static_cast<int>(lod[level][i + 1]);
Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);
......
...@@ -104,6 +104,8 @@ class ContextProjectFunctor { ...@@ -104,6 +104,8 @@ class ContextProjectFunctor {
sequence_width = in.dims()[1]; sequence_width = in.dims()[1];
for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
if (lod_level_0[i] == lod_level_0[i + 1]) continue;
input_row_begin = (context_start > 0) input_row_begin = (context_start > 0)
? static_cast<int>(lod_level_0[i]) + context_start ? static_cast<int>(lod_level_0[i]) + context_start
: static_cast<int>(lod_level_0[i]); : static_cast<int>(lod_level_0[i]);
...@@ -134,6 +136,8 @@ class ContextProjectFunctor { ...@@ -134,6 +136,8 @@ class ContextProjectFunctor {
if (padding_trainable) { if (padding_trainable) {
PADDLE_ENFORCE_NOT_NULL(padding_data); PADDLE_ENFORCE_NOT_NULL(padding_data);
for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
if (lod_level_0[i] == lod_level_0[i + 1]) continue;
Tensor out_t = col->Slice(static_cast<int>(lod_level_0[i]), Tensor out_t = col->Slice(static_cast<int>(lod_level_0[i]),
static_cast<int>(lod_level_0[i + 1])); static_cast<int>(lod_level_0[i + 1]));
...@@ -216,6 +220,8 @@ class ContextProjectGradFunctor { ...@@ -216,6 +220,8 @@ class ContextProjectGradFunctor {
if (input_grad) { if (input_grad) {
for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
if (lod_level_0[i] == lod_level_0[i + 1]) continue;
input_row_begin = (context_start > 0) input_row_begin = (context_start > 0)
? static_cast<int>(lod_level_0[i]) + context_start ? static_cast<int>(lod_level_0[i]) + context_start
: static_cast<int>(lod_level_0[i]); : static_cast<int>(lod_level_0[i]);
...@@ -248,6 +254,8 @@ class ContextProjectGradFunctor { ...@@ -248,6 +254,8 @@ class ContextProjectGradFunctor {
if (pad_grad) { if (pad_grad) {
if (padding_trainable) { if (padding_trainable) {
for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod_level_0.size()) - 1; ++i) {
if (lod_level_0[i] == lod_level_0[i + 1]) continue;
Tensor out_t = col->Slice(static_cast<int>(lod_level_0[i]), Tensor out_t = col->Slice(static_cast<int>(lod_level_0[i]),
static_cast<int>(lod_level_0[i + 1])); static_cast<int>(lod_level_0[i + 1]));
......
...@@ -197,9 +197,9 @@ void gpu_lstm_forward(const platform::DeviceContext& context, Op op, ...@@ -197,9 +197,9 @@ void gpu_lstm_forward(const platform::DeviceContext& context, Op op,
threads = dim3(frame_per_block, 1); threads = dim3(frame_per_block, 1);
grid = dim3(frame_blocks, 1); grid = dim3(frame_blocks, 1);
} else { } else {
/* frame_per_block = 32 batch_per_block = 32 */ /* frame_per_block = 32 batch_per_block = 16 */
threads = dim3(32, 32); threads = dim3(32, 16);
grid = dim3((frame_size + 32 - 1) / 32, (batch_size + 32 - 1) / 32); grid = dim3((frame_size + 32 - 1) / 32, (batch_size + 16 - 1) / 16);
} }
auto stream = auto stream =
......
...@@ -34,7 +34,9 @@ inline framework::LoD ConcatLoD(const Container &xs, ...@@ -34,7 +34,9 @@ inline framework::LoD ConcatLoD(const Container &xs,
for (size_t j = 0; j < xs.size(); ++j) { for (size_t j = 0; j < xs.size(); ++j) {
auto &x_lod = xs[j].get().lod()[0]; auto &x_lod = xs[j].get().lod()[0];
const framework::Tensor &tensor = xs[j].get(); const framework::Tensor &tensor = xs[j].get();
if (x_lod[i - 1] < x_lod[i]) {
xs_in_order->emplace_back(tensor.Slice(x_lod[i - 1], x_lod[i])); xs_in_order->emplace_back(tensor.Slice(x_lod[i - 1], x_lod[i]));
}
sum += x_lod[i]; sum += x_lod[i];
} }
result[i] = sum; result[i] = sum;
...@@ -97,6 +99,8 @@ class SeqConcatGradKernel : public framework::OpKernel<T> { ...@@ -97,6 +99,8 @@ class SeqConcatGradKernel : public framework::OpKernel<T> {
const framework::LoDTensor *x = xs[j]; const framework::LoDTensor *x = xs[j];
framework::LoDTensor *dx = dxs[j]; framework::LoDTensor *dx = dxs[j];
auto &x_lod = x->lod()[0]; auto &x_lod = x->lod()[0];
if (x_lod[i - 1] == x_lod[i]) continue;
sliced_x.emplace_back(x->Slice(x_lod[i - 1], x_lod[i])); sliced_x.emplace_back(x->Slice(x_lod[i - 1], x_lod[i]));
if (dx != nullptr) { if (dx != nullptr) {
sliced_dx.emplace_back(dx->Slice(x_lod[i - 1], x_lod[i])); sliced_dx.emplace_back(dx->Slice(x_lod[i - 1], x_lod[i]));
......
...@@ -47,8 +47,10 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> { ...@@ -47,8 +47,10 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
out->set_lod(in->lod()); out->set_lod(in->lod());
auto out_data = out->mutable_data<T>(context.GetPlace()); auto out_data = out->mutable_data<T>(context.GetPlace());
for (size_t i = 0; i < lod0.size() - 1; ++i) { for (size_t i = 0; i < lod0.size() - 1; ++i) {
if (lod0[i] == lod0[i + 1]) continue;
int start = lod0[i]; int start = lod0[i];
int end = lod0[i + 1]; int end = lod0[i + 1];
int copy_size = win_size < end - start + 1 ? win_size : end - start + 1; int copy_size = win_size < end - start + 1 ? win_size : end - start + 1;
int mid = end + 1 - copy_size; int mid = end + 1 - copy_size;
int pad_num = win_size - copy_size; int pad_num = win_size - copy_size;
......
...@@ -160,6 +160,7 @@ struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> { ...@@ -160,6 +160,7 @@ struct SequenceExpandGradFunctor<platform::CPUDeviceContext, T> {
int x_start = x_lod[i - 1]; int x_start = x_lod[i - 1];
int x_end = x_lod[i]; int x_end = x_lod[i];
int x_seq_len = x_end - x_start; int x_seq_len = x_end - x_start;
if (x_seq_len == 0) continue;
auto dx_sub = dx->Slice(x_start, x_end); auto dx_sub = dx->Slice(x_start, x_end);
dx_sub.Resize(flatten_to_1d(dx_sub.dims())); dx_sub.Resize(flatten_to_1d(dx_sub.dims()));
int dout_end = dout_offset + repeat_num * x_seq_len; int dout_end = dout_offset + repeat_num * x_seq_len;
......
...@@ -76,9 +76,9 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> { ...@@ -76,9 +76,9 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
PADDLE_ENFORCE_LE(0, offset_data[i], PADDLE_ENFORCE_LE(0, offset_data[i],
"The offset[%d] must greater than zero.", i); "The offset[%d] must be nonnegative.", i);
PADDLE_ENFORCE_LT(0, length_data[i], PADDLE_ENFORCE_LE(0, length_data[i],
"The length[%d] must greater than zero.", i); "The length[%d] must be nonnegative.", i);
PADDLE_ENFORCE_LE(lod[0][i] + offset_data[i] + length_data[i], PADDLE_ENFORCE_LE(lod[0][i] + offset_data[i] + length_data[i],
lod[0][i + 1], "The target tensor's length overflow."); lod[0][i + 1], "The target tensor's length overflow.");
} }
...@@ -95,6 +95,7 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> { ...@@ -95,6 +95,7 @@ class SequenceSliceOpKernel : public framework::OpKernel<T> {
size_t out_offset = 0; size_t out_offset = 0;
for (size_t i = 0; i < n; ++i) { for (size_t i = 0; i < n; ++i) {
if (length_data[i] == 0) continue;
Tensor in_t = in->Slice( Tensor in_t = in->Slice(
static_cast<int>(lod[0][i] + offset_data[i]), static_cast<int>(lod[0][i] + offset_data[i]),
static_cast<int>(lod[0][i] + offset_data[i] + length_data[i])); static_cast<int>(lod[0][i] + offset_data[i] + length_data[i]));
...@@ -144,6 +145,7 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> { ...@@ -144,6 +145,7 @@ class SequenceSliceGradOpKernel : public framework::OpKernel<T> {
static_cast<T>(0)); static_cast<T>(0));
for (size_t i = 0; i < out_lod[0].size() - 1; ++i) { for (size_t i = 0; i < out_lod[0].size() - 1; ++i) {
if (length_data[i] == 0) continue;
Tensor out_grad_t = Tensor out_grad_t =
out_grad->Slice(static_cast<int>(out_lod[0][i]), out_grad->Slice(static_cast<int>(out_lod[0][i]),
static_cast<int>(out_lod[0][i + 1])); static_cast<int>(out_lod[0][i + 1]));
......
...@@ -128,12 +128,15 @@ class TestCRFDecodingOp2(OpTest): ...@@ -128,12 +128,15 @@ class TestCRFDecodingOp2(OpTest):
ground truth being given. ground truth being given.
""" """
def init_lod(self):
self.lod = [[1, 2, 3, 4]]
def setUp(self): def setUp(self):
self.op_type = "crf_decoding" self.op_type = "crf_decoding"
TAG_NUM = 5 TAG_NUM = 5
lod = [[1, 2, 3, 4]] self.init_lod()
total_len = sum(lod[-1]) total_len = sum(self.lod[-1])
transition = np.repeat( transition = np.repeat(
np.arange( np.arange(
TAG_NUM, dtype="float64").reshape(1, TAG_NUM), TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
...@@ -152,9 +155,9 @@ class TestCRFDecodingOp2(OpTest): ...@@ -152,9 +155,9 @@ class TestCRFDecodingOp2(OpTest):
expected_output = (labels == predicted_labels).astype("int64") expected_output = (labels == predicted_labels).astype("int64")
self.inputs = { self.inputs = {
"Emission": (emission, lod), "Emission": (emission, self.lod),
"Transition": transition, "Transition": transition,
"Label": (labels, lod) "Label": (labels, self.lod)
} }
self.outputs = {"ViterbiPath": expected_output} self.outputs = {"ViterbiPath": expected_output}
...@@ -163,5 +166,15 @@ class TestCRFDecodingOp2(OpTest): ...@@ -163,5 +166,15 @@ class TestCRFDecodingOp2(OpTest):
self.check_output() self.check_output()
class TestCRFDecodingOp3(TestCRFDecodingOp2):
def init_lod(self):
self.lod = [[1, 0, 0, 4]]
class TestCRFDecodingOp4(TestCRFDecodingOp2):
def init_lod(self):
self.lod = [[0, 2, 3, 0]]
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -58,10 +58,10 @@ class TestEditDistanceOp(OpTest): ...@@ -58,10 +58,10 @@ class TestEditDistanceOp(OpTest):
x2 = np.array([[12, 4, 7, 8]]).astype("int64") x2 = np.array([[12, 4, 7, 8]]).astype("int64")
x1 = np.transpose(x1) x1 = np.transpose(x1)
x2 = np.transpose(x2) x2 = np.transpose(x2)
x1_lod = [1, 4] self.x1_lod = [1, 4]
x2_lod = [3, 1] self.x2_lod = [3, 1]
num_strs = len(x1_lod) num_strs = len(self.x1_lod)
distance = np.zeros((num_strs, 1)).astype("float32") distance = np.zeros((num_strs, 1)).astype("float32")
sequence_num = np.array(2).astype("int64") sequence_num = np.array(2).astype("int64")
...@@ -69,23 +69,26 @@ class TestEditDistanceOp(OpTest): ...@@ -69,23 +69,26 @@ class TestEditDistanceOp(OpTest):
x2_offset = 0 x2_offset = 0
for i in range(0, num_strs): for i in range(0, num_strs):
distance[i] = Levenshtein( distance[i] = Levenshtein(
hyp=x1[x1_offset:(x1_offset + x1_lod[i])], hyp=x1[x1_offset:(x1_offset + self.x1_lod[i])],
ref=x2[x2_offset:(x2_offset + x2_lod[i])]) ref=x2[x2_offset:(x2_offset + self.x2_lod[i])])
x1_offset += x1_lod[i] x1_offset += self.x1_lod[i]
x2_offset += x2_lod[i] x2_offset += self.x2_lod[i]
if normalized is True: if normalized is True:
len_ref = x2_lod[i] len_ref = self.x2_lod[i]
distance[i] = distance[i] / len_ref distance[i] = distance[i] / len_ref
self.attrs = {'normalized': normalized} self.attrs = {'normalized': normalized}
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])} self.inputs = {'Hyps': (x1, [self.x1_lod]), 'Refs': (x2, [self.x2_lod])}
self.outputs = {'Out': distance, 'SequenceNum': sequence_num} self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
class TestEditDistanceOpNormalized(OpTest): class TestEditDistanceOpNormalizedCase0(OpTest):
def reset_config(self):
pass
def setUp(self): def setUp(self):
self.op_type = "edit_distance" self.op_type = "edit_distance"
normalized = True normalized = True
...@@ -93,10 +96,11 @@ class TestEditDistanceOpNormalized(OpTest): ...@@ -93,10 +96,11 @@ class TestEditDistanceOpNormalized(OpTest):
x2 = np.array([[10, 4, 6, 7, 8]]).astype("int64") x2 = np.array([[10, 4, 6, 7, 8]]).astype("int64")
x1 = np.transpose(x1) x1 = np.transpose(x1)
x2 = np.transpose(x2) x2 = np.transpose(x2)
x1_lod = [1, 2, 3] self.x1_lod = [3, 0, 3]
x2_lod = [2, 1, 2] self.x2_lod = [2, 1, 2]
self.reset_config()
num_strs = len(x1_lod) num_strs = len(self.x1_lod)
distance = np.zeros((num_strs, 1)).astype("float32") distance = np.zeros((num_strs, 1)).astype("float32")
sequence_num = np.array(3).astype("int64") sequence_num = np.array(3).astype("int64")
...@@ -104,21 +108,33 @@ class TestEditDistanceOpNormalized(OpTest): ...@@ -104,21 +108,33 @@ class TestEditDistanceOpNormalized(OpTest):
x2_offset = 0 x2_offset = 0
for i in range(0, num_strs): for i in range(0, num_strs):
distance[i] = Levenshtein( distance[i] = Levenshtein(
hyp=x1[x1_offset:(x1_offset + x1_lod[i])], hyp=x1[x1_offset:(x1_offset + self.x1_lod[i])],
ref=x2[x2_offset:(x2_offset + x2_lod[i])]) ref=x2[x2_offset:(x2_offset + self.x2_lod[i])])
x1_offset += x1_lod[i] x1_offset += self.x1_lod[i]
x2_offset += x2_lod[i] x2_offset += self.x2_lod[i]
if normalized is True: if normalized is True:
len_ref = x2_lod[i] len_ref = self.x2_lod[i]
distance[i] = distance[i] / len_ref distance[i] = distance[i] / len_ref
self.attrs = {'normalized': normalized} self.attrs = {'normalized': normalized}
self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])} self.inputs = {'Hyps': (x1, [self.x1_lod]), 'Refs': (x2, [self.x2_lod])}
self.outputs = {'Out': distance, 'SequenceNum': sequence_num} self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
class TestEditDistanceOpNormalizedCase1(TestEditDistanceOpNormalizedCase0):
def reset_config(self):
self.x1_lod = [0, 6, 0]
self.x2_lod = [2, 1, 2]
class TestEditDistanceOpNormalizedCase2(TestEditDistanceOpNormalizedCase0):
def reset_config(self):
self.x1_lod = [0, 0, 6]
self.x2_lod = [2, 2, 1]
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -82,9 +82,9 @@ def gru( ...@@ -82,9 +82,9 @@ def gru(
hidden = np.zeros((T, D), dtype=dtype) hidden = np.zeros((T, D), dtype=dtype)
idx_in_seq_list, sorted_seqs = _seq_to_batch(lod, is_reverse) idx_in_seq_list, sorted_seqs = _seq_to_batch(lod, is_reverse)
h_p = h0[sorted_seqs] h_p = h0[[seq for seq in sorted_seqs if lod[0][seq] > 0]]
max_seq_len = len(idx_in_seq_list) max_seq_len = len(idx_in_seq_list)
assert len(idx_in_seq_list[0]) == N
end_idx = 0 end_idx = 0
for batch_idx in range(max_seq_len): for batch_idx in range(max_seq_len):
x = input[idx_in_seq_list[batch_idx]] x = input[idx_in_seq_list[batch_idx]]
...@@ -119,7 +119,6 @@ class TestGRUOp(OpTest): ...@@ -119,7 +119,6 @@ class TestGRUOp(OpTest):
T = sum(self.lod[0]) T = sum(self.lod[0])
N = len(self.lod[0]) N = len(self.lod[0])
input = np.random.rand(T, 3 * self.D).astype(self.dtype) input = np.random.rand(T, 3 * self.D).astype(self.dtype)
weight = np.random.rand(self.D, 3 * self.D).astype(self.dtype) weight = np.random.rand(self.D, 3 * self.D).astype(self.dtype)
bias = np.random.rand( bias = np.random.rand(
...@@ -173,6 +172,13 @@ class TestGRUOp2(TestGRUOp): ...@@ -173,6 +172,13 @@ class TestGRUOp2(TestGRUOp):
self.dtype = 'float32' self.dtype = 'float32'
class TestGRUOp2Len0(TestGRUOp):
def set_confs(self):
self.D = 19
self.lod = [[2, 0, 4]]
self.dtype = 'float32'
class TestGRUOp2OriginMode(TestGRUOp): class TestGRUOp2OriginMode(TestGRUOp):
def set_confs(self): def set_confs(self):
self.D = 19 self.D = 19
...@@ -180,6 +186,22 @@ class TestGRUOp2OriginMode(TestGRUOp): ...@@ -180,6 +186,22 @@ class TestGRUOp2OriginMode(TestGRUOp):
self.origin_mode = True self.origin_mode = True
class TestGRUOp2OriginModeLen0(TestGRUOp):
def set_confs(self):
self.D = 19
self.lod = [[0, 3, 4]]
self.dtype = 'float32'
self.origin_mode = True
class TestGRUOp2OriginModeLastLen0(TestGRUOp):
def set_confs(self):
self.D = 19
self.lod = [[0, 3, 0]]
self.dtype = 'float32'
self.origin_mode = True
class TestGRUOpNoInitial(TestGRUOp): class TestGRUOpNoInitial(TestGRUOp):
def set_confs(self): def set_confs(self):
self.with_h0 = False self.with_h0 = False
......
...@@ -89,7 +89,8 @@ class LinearChainCrfForward(object): ...@@ -89,7 +89,8 @@ class LinearChainCrfForward(object):
for i in range(self.seq_num): for i in range(self.seq_num):
start = self.seq_start_positions[i] start = self.seq_start_positions[i]
end = self.seq_start_positions[i + 1] end = self.seq_start_positions[i + 1]
if start >= end:
continue
self.log_likelihood[i] = self._forward_a_sequence( self.log_likelihood[i] = self._forward_a_sequence(
self.x[start:end, :], self.x_row_max[start:end, :], self.x[start:end, :], self.x_row_max[start:end, :],
self.x_exps[start:end, :], self.labels[start:end, :], self.x_exps[start:end, :], self.labels[start:end, :],
...@@ -110,7 +111,7 @@ class TestLinearChainCrfOp(OpTest): ...@@ -110,7 +111,7 @@ class TestLinearChainCrfOp(OpTest):
lod = [[]] lod = [[]]
seq_start_pos = [0] seq_start_pos = [0]
for i in range(SEQ_NUM): for i in range(SEQ_NUM):
lod[-1].append(random.randint(1, MAX_SEQ_LEN)) lod[-1].append(random.randint(0, MAX_SEQ_LEN))
seq_start_pos.append(seq_start_pos[-1] + lod[-1][-1]) seq_start_pos.append(seq_start_pos[-1] + lod[-1][-1])
emission = np.random.uniform( emission = np.random.uniform(
-1, 1, [seq_start_pos[-1], TAG_NUM]).astype("float64") -1, 1, [seq_start_pos[-1], TAG_NUM]).astype("float64")
......
...@@ -127,8 +127,11 @@ def lstm( ...@@ -127,8 +127,11 @@ def lstm(
class TestLstmOp(OpTest): class TestLstmOp(OpTest):
def set_argument(self): def set_lod(self):
self.lod = [[2, 3, 2]] self.lod = [[2, 3, 2]]
def set_argument(self):
self.set_lod()
self.D = 16 self.D = 16
self.act_gate = 'sigmoid' self.act_gate = 'sigmoid'
...@@ -142,7 +145,6 @@ class TestLstmOp(OpTest): ...@@ -142,7 +145,6 @@ class TestLstmOp(OpTest):
def setUp(self): def setUp(self):
self.set_argument() self.set_argument()
self.op_type = 'lstm' self.op_type = 'lstm'
T = sum(self.lod[0]) T = sum(self.lod[0])
N = len(self.lod[0]) N = len(self.lod[0])
...@@ -198,6 +200,21 @@ class TestLstmOp(OpTest): ...@@ -198,6 +200,21 @@ class TestLstmOp(OpTest):
['Input', 'Weight', 'Bias'], ['Hidden'], max_relative_error=5e-4) ['Input', 'Weight', 'Bias'], ['Hidden'], max_relative_error=5e-4)
class TestLstmOpCase1(TestLstmOp):
def set_lod(self):
self.lod = [[0, 3, 2]]
class TestLstmOpCase2(TestLstmOp):
def set_lod(self):
self.lod = [[0, 3, 0]]
class TestLstmOpCase3(TestLstmOp):
def set_lod(self):
self.lod = [[2, 0, 4]]
# class TestLstmOpHasInitial(TestLstmOp): # class TestLstmOpHasInitial(TestLstmOp):
# def set_argument(self): # def set_argument(self):
# self.lod = [[2, 3, 2]] # self.lod = [[2, 3, 2]]
......
...@@ -305,5 +305,15 @@ class TestLstmpOpLinearProjection(TestLstmpOp): ...@@ -305,5 +305,15 @@ class TestLstmpOpLinearProjection(TestLstmpOp):
self.act_proj = 'identity' self.act_proj = 'identity'
class TestLstmpOpLen0Case1(TestLstmpOp):
def reset_argument(self):
self.lod = [[0, 4, 0]]
class TestLstmpOpLen0Case2(TestLstmpOp):
def reset_argument(self):
self.lod = [[2, 0, 3]]
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -204,7 +204,24 @@ class TestSeqProjectCase1(TestSeqProject): ...@@ -204,7 +204,24 @@ class TestSeqProjectCase1(TestSeqProject):
self.output_represention = 8 # output feature size self.output_represention = 8 # output feature size
class TestSeqProjectCase2(TestSeqProject): class TestSeqProjectCase2Len0(TestSeqProject):
def init_test_case(self):
self.input_row = 11
self.context_start = -1
self.context_length = 3
self.padding_trainable = True
self.context_stride = 1
self.input_size = [self.input_row, 23]
offset_lod = [[0, 0, 4, 5, 5, 8, self.input_row, self.input_row]]
self.lod = [[]]
# convert from offset-based lod to length-based lod
for i in range(len(offset_lod[0]) - 1):
self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
self.output_represention = 8 # output feature size
class TestSeqProjectCase3(TestSeqProject):
def init_test_case(self): def init_test_case(self):
self.input_row = 25 self.input_row = 25
self.context_start = 2 self.context_start = 2
......
...@@ -20,19 +20,24 @@ from op_test import OpTest ...@@ -20,19 +20,24 @@ from op_test import OpTest
class TestSequenceConcat(OpTest): class TestSequenceConcat(OpTest):
def setLoD(self):
self.lod1 = [7, 3]
self.lod2 = [12, 8]
self.out_lod = [19, 11]
def setUp(self): def setUp(self):
x1 = np.random.random(size=(10, 80)) x1 = np.random.random(size=(10, 80))
lod1 = [7, 3]
x2 = np.random.random(size=(20, 80)) x2 = np.random.random(size=(20, 80))
lod2 = [12, 8] self.setLoD()
out = np.concatenate((x1[0:lod1[0]], x2[0:lod2[0]], x1[lod1[0]:], out = np.concatenate((x1[0:self.lod1[0]], x2[0:self.lod2[0]],
x2[lod2[0]:])) x1[self.lod1[0]:], x2[self.lod2[0]:]))
out_lod = [19, 11]
self.op_type = "sequence_concat" self.op_type = "sequence_concat"
self.inputs = {'X': [("x1", (x1, [lod1])), ("x2", (x2, [lod2]))]} self.inputs = {
self.outputs = {"Out": (out, [out_lod])} 'X': [("x1", (x1, [self.lod1])), ("x2", (x2, [self.lod2]))]
}
self.outputs = {"Out": (out, [self.out_lod])}
def test_output(self): def test_output(self):
self.check_output(1e-3) self.check_output(1e-3)
...@@ -41,5 +46,33 @@ class TestSequenceConcat(OpTest): ...@@ -41,5 +46,33 @@ class TestSequenceConcat(OpTest):
self.check_grad(inputs_to_check=['x1', 'x2'], output_names="Out") self.check_grad(inputs_to_check=['x1', 'x2'], output_names="Out")
class TestSequenceConcatCase2(TestSequenceConcat):
def setLoD(self):
self.lod1 = [10, 0]
self.lod2 = [12, 8]
self.out_lod = [22, 8]
class TestSequenceConcatCase3(TestSequenceConcat):
def setLoD(self):
self.lod1 = [10, 0]
self.lod2 = [20, 0]
self.out_lod = [30, 0]
class TestSequenceConcatCase4(TestSequenceConcat):
def setLoD(self):
self.lod1 = [0, 10]
self.lod2 = [0, 20]
self.out_lod = [0, 30]
class TestSequenceConcatCase5(TestSequenceConcat):
def setLoD(self):
self.lod1 = [0, 10]
self.lod2 = [20, 0]
self.out_lod = [20, 10]
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -101,5 +101,16 @@ class TestSequenceEnumerateOpLargePadValue(TestSequenceEnumerateOp): ...@@ -101,5 +101,16 @@ class TestSequenceEnumerateOpLargePadValue(TestSequenceEnumerateOp):
self.out_seq = np.array(out_seq).astype("int32") self.out_seq = np.array(out_seq).astype("int32")
class TestSequenceEnumerateOpLargePadValueSeqLen0(TestSequenceEnumerateOp):
def init_test_case(self):
self.in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
self.lod = [[0, 14, 0, 16, 0]]
self.win_size = 5
self.pad_value = 5
out_seq = sequence_enumerate(self.in_seq, self.lod, self.win_size,
self.pad_value)
self.out_seq = np.array(out_seq).astype("int32")
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -79,6 +79,21 @@ class TestSequenceEraseOpInt64(OpTest): ...@@ -79,6 +79,21 @@ class TestSequenceEraseOpInt64(OpTest):
self.check_output() self.check_output()
class TestSequenceEraseOpInt64SeqLen0(OpTest):
def setUp(self):
self.op_type = "sequence_erase"
in_seq = np.random.randint(0, 10, (30, 1)).astype("int64")
lod = [[0, 9, 0, 0, 10, 11, 0]]
tokens = [2, 3, 5]
out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
self.attrs = {'tokens': tokens}
self.inputs = {'X': (in_seq, lod)}
self.outputs = {'Out': (out_seq, [new_lod0])}
def test_check_output(self):
self.check_output()
class TestSequenceEraseOpEmpty(OpTest): class TestSequenceEraseOpEmpty(OpTest):
def setUp(self): def setUp(self):
self.op_type = "sequence_erase" self.op_type = "sequence_erase"
......
...@@ -116,5 +116,23 @@ class TestSequenceExpandCase4(TestSequenceExpand): ...@@ -116,5 +116,23 @@ class TestSequenceExpandCase4(TestSequenceExpand):
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
class TestSequenceExpandCase5(TestSequenceExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [6, 1]).astype('float32')
y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32')
y_lod = [[2, 4], [2, 2, 3, 0, 3, 3]]
self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}
self.attrs = {'ref_level': 1}
class TestSequenceExpandCase6(TestSequenceExpand):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32')
x_lod = [[1, 1, 0, 1, 1]]
y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
y_lod = [[0, 2, 4, 2, 0]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -65,6 +65,15 @@ class TestSequenceExpandAsCase1(TestSequenceExpandAs): ...@@ -65,6 +65,15 @@ class TestSequenceExpandAsCase1(TestSequenceExpandAs):
class TestSequenceExpandAsCase2(TestSequenceExpandAs): class TestSequenceExpandAsCase2(TestSequenceExpandAs):
def set_data(self):
x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
x_lod = [[2, 3]]
y_data = np.random.uniform(0.1, 1, [10, 1]).astype('float32')
y_lod = [[0, 4, 0, 6, 0]]
self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
class TestSequenceExpandAsCase3(TestSequenceExpandAs):
def set_data(self): def set_data(self):
x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32') x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32')
x_lod = [[1]] x_lod = [[1]]
......
...@@ -132,5 +132,14 @@ class TestSequencePadOp7(TestSequencePadOp): ...@@ -132,5 +132,14 @@ class TestSequencePadOp7(TestSequencePadOp):
self.dtype = 'float32' self.dtype = 'float32'
class TestSequencePadOp8(TestSequencePadOp):
def set_attr(self):
self.x_shape = [12, 2, 2]
self.x_len_lod = [[0, 8, 0, 4, 0]]
self.pad_value = [1.0]
self.padded_length = 10
self.dtype = 'float32'
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -21,17 +21,17 @@ from op_test import OpTest ...@@ -21,17 +21,17 @@ from op_test import OpTest
class TestSequenceReshape(OpTest): class TestSequenceReshape(OpTest):
def init_data(self):
self.dimension = 12
self.x_lod = [[4, 1, 3, 3]]
self.x = np.random.uniform(0.1, 1, [11, 24]).astype('float32')
def setUp(self): def setUp(self):
self.init_data()
self.op_type = 'sequence_reshape' self.op_type = 'sequence_reshape'
dimension = 12 self.inputs = {'X': (self.x, self.x_lod)}
x_lod = [[4, 1, 3, 3]] self.attrs = {'new_dim': self.dimension}
x = np.random.uniform(0.1, 1, [11, 24]).astype('float32') out, out_lod = self.compute_output(self.x, self.x_lod, self.dimension)
self.inputs = {'X': (x, x_lod)}
self.attrs = {'new_dim': dimension}
out, out_lod = self.compute_output(x, x_lod, dimension)
self.outputs = {'Out': (out, out_lod)} self.outputs = {'Out': (out, out_lod)}
def compute_output(self, x, x_lod, dimension): def compute_output(self, x, x_lod, dimension):
...@@ -54,33 +54,31 @@ class TestSequenceReshape(OpTest): ...@@ -54,33 +54,31 @@ class TestSequenceReshape(OpTest):
class TestSequenceReshape_reduce(TestSequenceReshape): class TestSequenceReshape_reduce(TestSequenceReshape):
def setUp(self): def init_data(self):
self.op_type = 'sequence_reshape' self.dimension = 24
dimension = 24 self.x_lod = [[4, 2, 2, 4]]
x_lod = [[4, 2, 2, 4]] self.x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
self.inputs = {'X': (x, x_lod)}
self.attrs = {'new_dim': dimension}
out, out_lod = self.compute_output(x, x_lod, dimension)
self.outputs = {'Out': (out, out_lod)}
class TestSequenceReshape_same(TestSequenceReshape): class TestSequenceReshape_same(TestSequenceReshape):
def setUp(self): def init_data(self):
self.op_type = 'sequence_reshape' self.dimension = 12
dimension = 12 self.x_lod = [[4, 2, 2, 4]]
x_lod = [[4, 2, 2, 4]] self.x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
self.inputs = {'X': (x, x_lod)}
self.attrs = {'new_dim': dimension}
out, out_lod = self.compute_output(x, x_lod, dimension) class TestSequenceReshape_reduce_seq_len0(TestSequenceReshape):
def init_data(self):
self.dimension = 24
self.x_lod = [[0, 6, 0, 2, 4]]
self.x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
self.outputs = {'Out': (out, out_lod)}
class TestSequenceReshape_reduce_seq_len0_case1(TestSequenceReshape):
def init_data(self):
self.dimension = 24
self.x_lod = [[0, 2, 8, 2, 0]]
self.x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -65,5 +65,17 @@ class TestSequenceReverse2(TestSequenceReverseBase): ...@@ -65,5 +65,17 @@ class TestSequenceReverse2(TestSequenceReverseBase):
self.lod = [12] self.lod = [12]
class TestSequenceReverse3(TestSequenceReverseBase):
def initParameters(self):
self.size = (12, 10)
self.lod = [3, 0, 6, 3]
class TestSequenceReverse3(TestSequenceReverseBase):
def initParameters(self):
self.size = (12, 10)
self.lod = [0, 2, 10, 0]
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -18,20 +18,26 @@ from op_test import OpTest ...@@ -18,20 +18,26 @@ from op_test import OpTest
class TestSequenceScatterOp(OpTest): class TestSequenceScatterOp(OpTest):
def init_lod(self):
return [[3, 5, 4]]
def setUp(self): def setUp(self):
self.op_type = "sequence_scatter" self.op_type = "sequence_scatter"
X_data = np.random.uniform(0.1, 1.0, [3, 6]).astype('float32') X_data = np.random.uniform(0.1, 1.0, [3, 6]).astype('float32')
Ids_data = np.array([[0], [1], [2], [5], [4], [3], [2], [1], [3], [2], Ids_data = np.array([[0], [1], [2], [5], [4], [3], [0], [1], [3], [2],
[5], [4]]).astype('int64') [5], [4]]).astype('int64')
Ids_lod = [[3, 5, 4]] Ids_lod = self.init_lod()
Updates_data = np.random.uniform(0.1, 1.0, [12, 1]).astype('float32') Updates_data = np.random.uniform(0.1, 1.0, [12, 1]).astype('float32')
Updates_lod = Ids_lod Updates_lod = Ids_lod
Out_data = np.copy(X_data) Out_data = np.copy(X_data)
Out_data[0][Ids_data[0:3]] += Updates_data[0:3] offset = 0
Out_data[1][Ids_data[3:8]] += Updates_data[3:8] for i in range(3):
Out_data[2][Ids_data[8:]] += Updates_data[8:] Out_data[i][Ids_data[offset:(offset + Ids_lod[0][
i])]] += Updates_data[offset:(offset + Ids_lod[0][i])]
offset += Ids_lod[0][i]
self.inputs = { self.inputs = {
'X': X_data, 'X': X_data,
...@@ -47,5 +53,20 @@ class TestSequenceScatterOp(OpTest): ...@@ -47,5 +53,20 @@ class TestSequenceScatterOp(OpTest):
self.check_grad(['Updates'], 'Out', in_place=True) self.check_grad(['Updates'], 'Out', in_place=True)
class TestSequenceScatterOpSeqLen0(TestSequenceScatterOp):
def init_lod(self):
return [[6, 0, 6]]
class TestSequenceScatterOpSeqLen0Case1(TestSequenceScatterOp):
def init_lod(self):
return [[0, 6, 6]]
class TestSequenceScatterOpSeqLen0Case2(TestSequenceScatterOp):
def init_lod(self):
return [[6, 6, 0]]
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -59,5 +59,29 @@ class TestSequenceSliceOp(OpTest): ...@@ -59,5 +59,29 @@ class TestSequenceSliceOp(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
class TestSequenceSliceOpSeqlen0Case0(TestSequenceSliceOp):
def init_test_case(self):
self.x_dim = (100, 3, 2)
self.x_lod = [[20, 30, 0, 30, 20]]
self.offset = [[1], [2], [0], [4], [5]]
self.length = [[10], [8], [0], [4], [2]]
class TestSequenceSliceOpSeqlen0Case1(TestSequenceSliceOp):
def init_test_case(self):
self.x_dim = (100, 3, 2)
self.x_lod = [[0, 70, 0, 30, 0]]
self.offset = [[0], [2], [0], [4], [0]]
self.length = [[0], [8], [0], [4], [0]]
class TestSequenceSliceOpSeqlen0Case2(TestSequenceSliceOp):
def init_test_case(self):
self.x_dim = (100, 3, 2)
self.x_lod = [[0, 100, 0, 0, 0]]
self.offset = [[0], [2], [0], [0], [0]]
self.length = [[0], [8], [0], [0], [0]]
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -28,21 +28,26 @@ class TestSequenceSoftmaxOp(OpTest): ...@@ -28,21 +28,26 @@ class TestSequenceSoftmaxOp(OpTest):
self.init_op_type() self.init_op_type()
x = np.random.uniform(0.1, 1, (11, 1)).astype("float32") x = np.random.uniform(0.1, 1, (11, 1)).astype("float32")
lod = [[4, 1, 3, 3]] self.init_lod()
out = np.zeros((11, 1)).astype("float32") out = np.zeros((11, 1)).astype("float32")
offset = 0 offset = 0
for i in range(len(lod[0])): for i in range(len(self.lod[0])):
sub_x = x[offset:offset + lod[0][i], :] if (self.lod[0][i] == 0):
sub_x = sub_x.reshape(1, lod[0][i]) continue
sub_x = x[offset:offset + self.lod[0][i], :]
sub_x = sub_x.reshape(1, self.lod[0][i])
sub_out = stable_softmax(sub_x) sub_out = stable_softmax(sub_x)
out[offset:offset + lod[0][i], :] = sub_out.reshape(lod[0][i], 1) out[offset:offset + self.lod[0][i], :] = sub_out.reshape(
offset += lod[0][i] self.lod[0][i], 1)
offset += self.lod[0][i]
self.inputs = {"X": (x, lod)} self.inputs = {"X": (x, self.lod)}
self.outputs = {"Out": out} self.outputs = {"Out": out}
self.attrs = {'use_cudnn': self.use_cudnn, } self.attrs = {'use_cudnn': self.use_cudnn, }
def init_lod(self):
self.lod = [[4, 1, 3, 3]]
def init_op_type(self): def init_op_type(self):
pass pass
...@@ -70,5 +75,20 @@ class TestSequenceSoftmaxCUDNNOp(TestSequenceSoftmaxOp): ...@@ -70,5 +75,20 @@ class TestSequenceSoftmaxCUDNNOp(TestSequenceSoftmaxOp):
self.use_cudnn = True self.use_cudnn = True
class TestSequenceSoftmaxOpSeqLen0Case0(TestSequenceSoftmaxOp):
def init_lod(self):
self.lod = [[4, 0, 4, 3]]
class TestSequenceSoftmaxOpSeqLen0Case1(TestSequenceSoftmaxOp):
def init_lod(self):
self.lod = [[0, 4, 7, 0]]
class TestSequenceSoftmaxOpSeqLen0Case2(TestSequenceSoftmaxOp):
def init_lod(self):
self.lod = [[0, 0, 0, 11]]
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -71,5 +71,19 @@ class TestSequenceUnpadOp3(TestSequenceUnpadOp): ...@@ -71,5 +71,19 @@ class TestSequenceUnpadOp3(TestSequenceUnpadOp):
self.dtype = "float64" self.dtype = "float64"
class TestSequenceUnpadOp4(TestSequenceUnpadOp):
def init(self):
self.length = [5, 0, 0, 4]
self.x_shape = (4, 5, 3, 3, 6)
self.dtype = "float64"
class TestSequenceUnpadOp4(TestSequenceUnpadOp):
def init(self):
self.length = [0, 4, 3, 0]
self.x_shape = (4, 5, 3, 3, 6)
self.dtype = "float64"
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册