未验证 提交 6ca44cba 编写于 作者: J jinyuKING 提交者: GitHub

API/OP(sequence_first_step, sequence_last_step, sequence_mask, beam_search,...

API/OP(sequence_first_step, sequence_last_step, sequence_mask, beam_search, beam_search_decode) error message enhancement (#24590)
上级 0c85fd0a
...@@ -128,14 +128,31 @@ class BeamSearchDecodeOp : public framework::OperatorBase { ...@@ -128,14 +128,31 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids"); const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores"); const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
const size_t step_num = ids->size(); const size_t step_num = ids->size();
PADDLE_ENFORCE_GT(step_num, 0UL, PADDLE_ENFORCE_GT(
"beam search steps should be larger than 0"); step_num, 0UL,
platform::errors::InvalidArgument(
"beam search steps, which is the"
"size of Input(Ids) LoDTensorArray. beam search steps should "
"be larger than 0, but received %d. ",
step_num));
const size_t source_num = ids->at(0).lod().at(0).size() - 1; const size_t source_num = ids->at(0).lod().at(0).size() - 1;
PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0"); PADDLE_ENFORCE_GT(
source_num, 0UL,
platform::errors::InvalidArgument(
"source_num is the sequence number of the"
"first decoding step, indicating by Input(Ids)[0].lod[0].size. "
"The number of source_num should be larger than"
"0, but received %d. ",
source_num));
for (size_t i = 0; i < step_num; ++i) { for (size_t i = 0; i < step_num; ++i) {
PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL, PADDLE_ENFORCE_EQ(
"Level of LodTensor should be 2"); ids->at(i).lod().size(), 2UL,
platform::errors::InvalidArgument(
"For the i step in beam search steps,"
"the size of Input(Ids)[i].lod() should larger than 2,"
"but received %d. ",
ids->at(i).lod().size()));
} }
size_t beam_size = ctx.Attr<int>("beam_size"); size_t beam_size = ctx.Attr<int>("beam_size");
...@@ -190,14 +207,14 @@ hypothesis has. ...@@ -190,14 +207,14 @@ hypothesis has.
class BeamSearchDecodeInferShape : public framework::InferShapeBase { class BeamSearchDecodeInferShape : public framework::InferShapeBase {
public: public:
void operator()(framework::InferShapeContext* context) const override { void operator()(framework::InferShapeContext* context) const override {
PADDLE_ENFORCE(context->HasInput("Ids"), OP_INOUT_CHECK(context->HasInput("Ids"), "Input", "Ids",
"BeamSearchDecodeOp must have input Ids"); "BeamSearchDecode");
PADDLE_ENFORCE(context->HasInput("Scores"), OP_INOUT_CHECK(context->HasInput("Scores"), "Input", "Scores",
"BeamSearchDecodeOp must have input Scores"); "BeamSearchDecode");
PADDLE_ENFORCE(context->HasOutput("SentenceIds"), OP_INOUT_CHECK(context->HasOutput("SentenceIds"), "Output", "SentenceIds",
"BeamSearchDecodeOp must have output SentenceIds"); "BeamSearchDecode");
PADDLE_ENFORCE(context->HasOutput("SentenceScores"), OP_INOUT_CHECK(context->HasOutput("SentenceScores"), "Output",
"BeamSearchDecodeOp must have output SentenceScores"); "SentenceScores", "BeamSearchDecode");
} }
}; };
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <memory>
#include <vector> #include <vector>
#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/lod_tensor_array.h"
...@@ -82,7 +83,15 @@ void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor( ...@@ -82,7 +83,15 @@ void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
LoDTensor* score_tensor, bool reverse, bool sort_by_score) const { LoDTensor* score_tensor, bool reverse, bool sort_by_score) const {
size_t src_num = sentence_vector_list.size(); size_t src_num = sentence_vector_list.size();
PADDLE_ENFORCE_NE(src_num, 0, "src_num should not be 0"); PADDLE_ENFORCE_NE(
src_num, 0,
platform::errors::InvalidArgument(
"src_num is the sequence number of the first decoding step"
", indicating by Input(Ids)[0].lod[0].size."
"src_num has wrong value."
"src_num should not be 0,"
"But received %d.",
src_num));
std::vector<size_t> source_level_lod = {0}; std::vector<size_t> source_level_lod = {0};
std::vector<size_t> sentence_level_lod = {0}; std::vector<size_t> sentence_level_lod = {0};
...@@ -144,9 +153,16 @@ void BeamSearchDecoder<T>::Backtrace(const LoDTensorArray& step_ids, ...@@ -144,9 +153,16 @@ void BeamSearchDecoder<T>::Backtrace(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores, const LoDTensorArray& step_scores,
LoDTensor* id_tensor, LoDTensor* id_tensor,
LoDTensor* score_tensor) const { LoDTensor* score_tensor) const {
PADDLE_ENFORCE(!step_ids.empty(), "step num should be larger than 0"); PADDLE_ENFORCE_NE(
PADDLE_ENFORCE_EQ(step_ids.size(), step_scores.size(), step_ids.empty(), true,
"step_ids and step_scores should be the same"); platform::errors::InvalidArgument("Input(Ids) should not be empty."
"But the Input(Ids) is empty."));
PADDLE_ENFORCE_EQ(
step_ids.size(), step_scores.size(),
platform::errors::InvalidArgument(
"The size of Input(Ids) and Input(Scores) should be "
"the same. But the size of Input(Ids) and Input(Scores) "
"are not equal."));
const size_t step_num = step_ids.size(); const size_t step_num = step_ids.size();
const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1; const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1;
std::vector<SentenceVector<T>> sentence_vector_list( std::vector<SentenceVector<T>> sentence_vector_list(
......
...@@ -35,10 +35,17 @@ void GenerateExample(const std::vector<size_t>& level_0, ...@@ -35,10 +35,17 @@ void GenerateExample(const std::vector<size_t>& level_0,
const std::vector<int>& data, LoDTensorArray* ids, const std::vector<int>& data, LoDTensorArray* ids,
LoDTensorArray* scores) { LoDTensorArray* scores) {
PADDLE_ENFORCE_EQ(level_0.back(), level_1.size() - 1, PADDLE_ENFORCE_EQ(level_0.back(), level_1.size() - 1,
"source level is used to describe candidate set"); platform::errors::InvalidArgument(
"source level is used to describe candidate set"
", so it's element should less than levle_1 length. "
"And the value of source"
"level is %d. ",
level_1.size() - 1));
PADDLE_ENFORCE_EQ(level_1.back(), data.size(), PADDLE_ENFORCE_EQ(level_1.back(), data.size(),
"the lowest level is used to describe data" platform::errors::InvalidArgument(
", so it's last element should be data length"); "the lowest level is used to describe data"
", so it's last element should be data length %d. ",
data.size()));
CPUPlace place; CPUPlace place;
......
...@@ -89,13 +89,11 @@ class BeamSearchOp : public framework::OperatorWithKernel { ...@@ -89,13 +89,11 @@ class BeamSearchOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
for (const std::string &arg : for (const std::string &arg :
std::vector<std::string>({"pre_ids", "scores"})) { std::vector<std::string>({"pre_ids", "scores"})) {
PADDLE_ENFORCE(ctx->HasInput(arg), "BeamSearch need input argument '%s'", OP_INOUT_CHECK(ctx->HasInput(arg), "Input", arg, "BeamSeach");
arg);
} }
for (const std::string &arg : for (const std::string &arg :
std::vector<std::string>({"selected_ids", "selected_scores"})) { std::vector<std::string>({"selected_ids", "selected_scores"})) {
PADDLE_ENFORCE(ctx->HasOutput(arg), OP_INOUT_CHECK(ctx->HasOutput(arg), "Output", arg, "BeamSeach");
"BeamSearch need output argument '%s'", arg);
} }
} }
......
...@@ -29,9 +29,15 @@ class BeamSearchOpKernel : public framework::OpKernel<T> { ...@@ -29,9 +29,15 @@ class BeamSearchOpKernel : public framework::OpKernel<T> {
auto* pre_ids = context.Input<framework::LoDTensor>("pre_ids"); auto* pre_ids = context.Input<framework::LoDTensor>("pre_ids");
auto* pre_scores = context.Input<framework::LoDTensor>("pre_scores"); auto* pre_scores = context.Input<framework::LoDTensor>("pre_scores");
PADDLE_ENFORCE_NOT_NULL(scores); PADDLE_ENFORCE_NOT_NULL(scores,
PADDLE_ENFORCE_NOT_NULL(pre_ids); platform::errors::NotFound(
PADDLE_ENFORCE_NOT_NULL(pre_scores); "Input(scores) of BeamSearchOp is not found."));
PADDLE_ENFORCE_NOT_NULL(
pre_ids, platform::errors::NotFound(
"Input(pre_ids) of BeamSearchOp is not found."));
PADDLE_ENFORCE_NOT_NULL(
pre_scores, platform::errors::NotFound(
"Input(pre_scores) of BeamSearchOp is not found."));
size_t level = context.Attr<int>("level"); size_t level = context.Attr<int>("level");
size_t beam_size = context.Attr<int>("beam_size"); size_t beam_size = context.Attr<int>("beam_size");
...@@ -42,8 +48,14 @@ class BeamSearchOpKernel : public framework::OpKernel<T> { ...@@ -42,8 +48,14 @@ class BeamSearchOpKernel : public framework::OpKernel<T> {
auto selected_scores = auto selected_scores =
context.Output<framework::LoDTensor>("selected_scores"); context.Output<framework::LoDTensor>("selected_scores");
auto* parent_idx = context.Output<framework::Tensor>("parent_idx"); auto* parent_idx = context.Output<framework::Tensor>("parent_idx");
PADDLE_ENFORCE_NOT_NULL(selected_ids); PADDLE_ENFORCE_NOT_NULL(
PADDLE_ENFORCE_NOT_NULL(selected_scores); selected_ids,
platform::errors::NotFound(
"Output(selected_scores) of BeamSearchOp is not found."));
PADDLE_ENFORCE_NOT_NULL(
selected_scores,
platform::errors::NotFound(
"Output(parent_idx) of BeamSearchOp is not found."));
math::BeamSearchFunctor<DeviceContext, T> alg; math::BeamSearchFunctor<DeviceContext, T> alg;
alg(context.template device_context<DeviceContext>(), pre_ids, pre_scores, alg(context.template device_context<DeviceContext>(), pre_ids, pre_scores,
......
...@@ -23,8 +23,8 @@ class SequenceMaskOp : public framework::OperatorWithKernel { ...@@ -23,8 +23,8 @@ class SequenceMaskOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must exist"); OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceMask");
PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) must exist"); OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "SequenceMask");
int maxlen = ctx->Attrs().Get<int>("maxlen"); int maxlen = ctx->Attrs().Get<int>("maxlen");
auto dim = framework::vectorize<int>(ctx->GetInputDim("X")); auto dim = framework::vectorize<int>(ctx->GetInputDim("X"));
......
...@@ -80,7 +80,10 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> { ...@@ -80,7 +80,10 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> {
int maxlen = ctx.Attr<int>("maxlen"); int maxlen = ctx.Attr<int>("maxlen");
if (ctx.HasInput("MaxLenTensor")) { if (ctx.HasInput("MaxLenTensor")) {
auto max_len_tensor = ctx.Input<Tensor>("MaxLenTensor"); auto max_len_tensor = ctx.Input<Tensor>("MaxLenTensor");
PADDLE_ENFORCE(max_len_tensor != NULL, "MaxLenTensor is NULL"); PADDLE_ENFORCE_NOT_NULL(max_len_tensor,
platform::errors::InvalidArgument(
"Input(MaxLenTensor) should not be NULL."
"But received Input(MaxLenTensor) is NULL"));
if (platform::is_gpu_place(max_len_tensor->place())) { if (platform::is_gpu_place(max_len_tensor->place())) {
framework::Tensor temp; framework::Tensor temp;
TensorCopySync(*max_len_tensor, platform::CPUPlace(), &temp); TensorCopySync(*max_len_tensor, platform::CPUPlace(), &temp);
...@@ -93,8 +96,12 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> { ...@@ -93,8 +96,12 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> {
y_dim.push_back(maxlen); y_dim.push_back(maxlen);
y->Resize(framework::make_ddim(y_dim)); y->Resize(framework::make_ddim(y_dim));
PADDLE_ENFORCE_GT(maxlen, 0, PADDLE_ENFORCE_GT(
"MaxLenTensor value should be greater than 0"); maxlen, 0,
platform::errors::InvalidArgument(
"Input(MaxLenTensor) value should be greater than 0. But "
"received Input(MaxLenTensor) value = %d.",
maxlen));
} }
auto *x_data = x->data<Tx>(); auto *x_data = x->data<Tx>();
......
...@@ -57,7 +57,7 @@ __all__ = [ ...@@ -57,7 +57,7 @@ __all__ = [
class RNNCell(object): class RNNCell(object):
""" """
:api_attr: Static Graph :api_attr: Static Graph
RNNCell is the base class for abstraction representing the calculations RNNCell is the base class for abstraction representing the calculations
mapping the input and state to the output and new state. It is suitable to mapping the input and state to the output and new state. It is suitable to
...@@ -223,7 +223,7 @@ class RNNCell(object): ...@@ -223,7 +223,7 @@ class RNNCell(object):
class GRUCell(RNNCell): class GRUCell(RNNCell):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Gated Recurrent Unit cell. It is a wrapper for Gated Recurrent Unit cell. It is a wrapper for
`fluid.contrib.layers.rnn_impl.BasicGRUUnit` to make it adapt to RNNCell. `fluid.contrib.layers.rnn_impl.BasicGRUUnit` to make it adapt to RNNCell.
...@@ -321,7 +321,7 @@ class GRUCell(RNNCell): ...@@ -321,7 +321,7 @@ class GRUCell(RNNCell):
class LSTMCell(RNNCell): class LSTMCell(RNNCell):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Long-Short Term Memory cell. It is a wrapper for Long-Short Term Memory cell. It is a wrapper for
`fluid.contrib.layers.rnn_impl.BasicLSTMUnit` to make it adapt to RNNCell. `fluid.contrib.layers.rnn_impl.BasicLSTMUnit` to make it adapt to RNNCell.
...@@ -437,7 +437,7 @@ def rnn(cell, ...@@ -437,7 +437,7 @@ def rnn(cell,
is_reverse=False, is_reverse=False,
**kwargs): **kwargs):
""" """
:api_attr: Static Graph :api_attr: Static Graph
rnn creates a recurrent neural network specified by RNNCell `cell`, rnn creates a recurrent neural network specified by RNNCell `cell`,
which performs :code:`cell.call()` repeatedly until reaches to the maximum which performs :code:`cell.call()` repeatedly until reaches to the maximum
...@@ -583,7 +583,7 @@ def rnn(cell, ...@@ -583,7 +583,7 @@ def rnn(cell,
class Decoder(object): class Decoder(object):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Decoder is the base class for any decoder instance used in `dynamic_decode`. Decoder is the base class for any decoder instance used in `dynamic_decode`.
It provides interface for output generation for one time step, which can be It provides interface for output generation for one time step, which can be
...@@ -696,7 +696,7 @@ class Decoder(object): ...@@ -696,7 +696,7 @@ class Decoder(object):
class BeamSearchDecoder(Decoder): class BeamSearchDecoder(Decoder):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Decoder with beam search decoding strategy. It wraps a cell to get probabilities, Decoder with beam search decoding strategy. It wraps a cell to get probabilities,
and follows a beam search step to calculate scores and select candidate and follows a beam search step to calculate scores and select candidate
...@@ -1174,7 +1174,7 @@ def dynamic_decode(decoder, ...@@ -1174,7 +1174,7 @@ def dynamic_decode(decoder,
return_length=False, return_length=False,
**kwargs): **kwargs):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned
Tensor indicating finished status contains all True values or the number of Tensor indicating finished status contains all True values or the number of
...@@ -1998,7 +1998,7 @@ def dynamic_lstm(input, ...@@ -1998,7 +1998,7 @@ def dynamic_lstm(input,
dtype='float32', dtype='float32',
name=None): name=None):
""" """
:api_attr: Static Graph :api_attr: Static Graph
**Note**: **Note**:
1. This OP only supports LoDTensor as inputs. If you need to deal with Tensor, please use :ref:`api_fluid_layers_lstm` . 1. This OP only supports LoDTensor as inputs. If you need to deal with Tensor, please use :ref:`api_fluid_layers_lstm` .
...@@ -2170,7 +2170,7 @@ def lstm(input, ...@@ -2170,7 +2170,7 @@ def lstm(input,
default_initializer=None, default_initializer=None,
seed=-1): seed=-1):
""" """
:api_attr: Static Graph :api_attr: Static Graph
**Note**: **Note**:
This OP only supports running on GPU devices. This OP only supports running on GPU devices.
...@@ -2357,7 +2357,7 @@ def dynamic_lstmp(input, ...@@ -2357,7 +2357,7 @@ def dynamic_lstmp(input,
cell_clip=None, cell_clip=None,
proj_clip=None): proj_clip=None):
""" """
:api_attr: Static Graph :api_attr: Static Graph
**Note**: **Note**:
1. In order to improve efficiency, users must first map the input of dimension [T, hidden_size] to input of [T, 4 * hidden_size], and then pass it to this OP. 1. In order to improve efficiency, users must first map the input of dimension [T, hidden_size] to input of [T, 4 * hidden_size], and then pass it to this OP.
...@@ -2568,7 +2568,7 @@ def dynamic_gru(input, ...@@ -2568,7 +2568,7 @@ def dynamic_gru(input,
h_0=None, h_0=None,
origin_mode=False): origin_mode=False):
""" """
:api_attr: Static Graph :api_attr: Static Graph
**Note: The input type of this must be LoDTensor. If the input type to be **Note: The input type of this must be LoDTensor. If the input type to be
processed is Tensor, use** :ref:`api_fluid_layers_StaticRNN` . processed is Tensor, use** :ref:`api_fluid_layers_StaticRNN` .
...@@ -2730,7 +2730,7 @@ def gru_unit(input, ...@@ -2730,7 +2730,7 @@ def gru_unit(input,
gate_activation='sigmoid', gate_activation='sigmoid',
origin_mode=False): origin_mode=False):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Gated Recurrent Unit (GRU) RNN cell. This operator performs GRU calculations for Gated Recurrent Unit (GRU) RNN cell. This operator performs GRU calculations for
one time step and it supports these two modes: one time step and it supports these two modes:
...@@ -2888,9 +2888,9 @@ def beam_search(pre_ids, ...@@ -2888,9 +2888,9 @@ def beam_search(pre_ids,
name=None, name=None,
return_parent_idx=False): return_parent_idx=False):
""" """
:alias_main: paddle.nn.beam_search :alias_main: paddle.nn.beam_search
:alias: paddle.nn.beam_search,paddle.nn.decode.beam_search :alias: paddle.nn.beam_search,paddle.nn.decode.beam_search
:old_api: paddle.fluid.layers.beam_search :old_api: paddle.fluid.layers.beam_search
Beam search is a classical algorithm for selecting candidate words in a Beam search is a classical algorithm for selecting candidate words in a
machine translation task. machine translation task.
...@@ -2993,6 +2993,12 @@ def beam_search(pre_ids, ...@@ -2993,6 +2993,12 @@ def beam_search(pre_ids,
beam_size=beam_size, beam_size=beam_size,
end_id=end_id) end_id=end_id)
""" """
check_variable_and_dtype(pre_ids, 'pre_ids', ['int64'], 'beam_search')
check_variable_and_dtype(pre_scores, 'pre_scores', ['float32', 'float64'],
'beam_search')
check_type(ids, 'ids', (Variable, type(None)), 'beam_search')
check_variable_and_dtype(scores, 'scores', ['float32', 'float64'],
'beam_search')
helper = LayerHelper('beam_search', **locals()) helper = LayerHelper('beam_search', **locals())
score_type = pre_scores.dtype score_type = pre_scores.dtype
id_type = pre_ids.dtype id_type = pre_ids.dtype
...@@ -3033,9 +3039,9 @@ def beam_search(pre_ids, ...@@ -3033,9 +3039,9 @@ def beam_search(pre_ids,
def beam_search_decode(ids, scores, beam_size, end_id, name=None): def beam_search_decode(ids, scores, beam_size, end_id, name=None):
""" """
:alias_main: paddle.nn.beam_search_decode :alias_main: paddle.nn.beam_search_decode
:alias: paddle.nn.beam_search_decode,paddle.nn.decode.beam_search_decode :alias: paddle.nn.beam_search_decode,paddle.nn.decode.beam_search_decode
:old_api: paddle.fluid.layers.beam_search_decode :old_api: paddle.fluid.layers.beam_search_decode
This operator is used after beam search has completed. It constructs the This operator is used after beam search has completed. It constructs the
full predicted sequences for each sample by walking back along the search full predicted sequences for each sample by walking back along the search
...@@ -3090,6 +3096,9 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None): ...@@ -3090,6 +3096,9 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):
finished_ids, finished_scores = fluid.layers.beam_search_decode( finished_ids, finished_scores = fluid.layers.beam_search_decode(
ids, scores, beam_size=5, end_id=0) ids, scores, beam_size=5, end_id=0)
""" """
check_variable_and_dtype(ids, 'ids', ['int64'], 'beam_search_encode')
check_variable_and_dtype(scores, 'scores', ['float32'],
'beam_search_encode')
helper = LayerHelper('beam_search_decode', **locals()) helper = LayerHelper('beam_search_decode', **locals())
sentence_ids = helper.create_variable_for_type_inference(dtype=ids.dtype) sentence_ids = helper.create_variable_for_type_inference(dtype=ids.dtype)
sentence_scores = helper.create_variable_for_type_inference(dtype=ids.dtype) sentence_scores = helper.create_variable_for_type_inference(dtype=ids.dtype)
...@@ -3116,7 +3125,7 @@ def lstm_unit(x_t, ...@@ -3116,7 +3125,7 @@ def lstm_unit(x_t,
bias_attr=None, bias_attr=None,
name=None): name=None):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Long-Short Term Memory (LSTM) RNN cell. This operator performs LSTM calculations for Long-Short Term Memory (LSTM) RNN cell. This operator performs LSTM calculations for
one time step, whose implementation is based on calculations described in `RECURRENT one time step, whose implementation is based on calculations described in `RECURRENT
......
...@@ -320,7 +320,7 @@ def sequence_pool(input, pool_type, is_test=False, pad_value=0.0): ...@@ -320,7 +320,7 @@ def sequence_pool(input, pool_type, is_test=False, pad_value=0.0):
where 1.=1., 5.=3. + 2., 4.=4., 0.0=pad_value, 12.=6. + 5. + 1. where 1.=1., 5.=3. + 2., 4.=4., 0.0=pad_value, 12.=6. + 5. + 1.
Args: Args:
input (variable): LoDTensor with lod_level no more than 2. The data type should be float32. input (variable): LoDTensor with lod_level no more than 2. The data type should be float32 or float64.
pool_type (str): The pooling type that supports average, sum, sqrt, max, last or first. pool_type (str): The pooling type that supports average, sum, sqrt, max, last or first.
is_test (bool): Only works when :attr:`pool_type` is max. If set False, a temporary Tenosr maxIndex is is_test (bool): Only works when :attr:`pool_type` is max. If set False, a temporary Tenosr maxIndex is
created to record the index information corresponding to the maximum value, which is used for backward created to record the index information corresponding to the maximum value, which is used for backward
...@@ -328,7 +328,7 @@ def sequence_pool(input, pool_type, is_test=False, pad_value=0.0): ...@@ -328,7 +328,7 @@ def sequence_pool(input, pool_type, is_test=False, pad_value=0.0):
pad_value (float): Used to pad the pooling result for empty input sequence. Default: 0.0 pad_value (float): Used to pad the pooling result for empty input sequence. Default: 0.0
Returns: Returns:
Variable: LoDTensor after pooling with data type float32. Variable: LoDTensor after pooling with data type float32 or float64.
Examples: Examples:
...@@ -471,10 +471,10 @@ def sequence_first_step(input): ...@@ -471,10 +471,10 @@ def sequence_first_step(input):
where 1.=first(1.), 3.=first(3., 2.), 4.=first(4.), 0.0 = pad_value, 6.=first(6., 5., 1.) where 1.=first(1.), 3.=first(3., 2.), 4.=first(4.), 0.0 = pad_value, 6.=first(6., 5., 1.)
Args: Args:
input(Variable): LoDTensor with lod_level no more than 2. The data type should be float32. input(Variable): LoDTensor with lod_level no more than 2. The data type should be float32 or float64.
Returns: Returns:
Variable: LoDTensor consist of the sequence's first step vector. The data type is float32. Variable: LoDTensor consist of the sequence's first step vector. The data type is float32 or float64.
Examples: Examples:
...@@ -484,6 +484,8 @@ def sequence_first_step(input): ...@@ -484,6 +484,8 @@ def sequence_first_step(input):
x = fluid.data(name='x', shape=[None, 10], dtype='float32', lod_level=1) x = fluid.data(name='x', shape=[None, 10], dtype='float32', lod_level=1)
x_first_step = fluid.layers.sequence_first_step(input=x) x_first_step = fluid.layers.sequence_first_step(input=x)
""" """
check_variable_and_dtype(input, 'input', ['float32', 'float64'],
'sequence_first_step')
return sequence_pool(input=input, pool_type="first") return sequence_pool(input=input, pool_type="first")
...@@ -539,6 +541,8 @@ def sequence_last_step(input): ...@@ -539,6 +541,8 @@ def sequence_last_step(input):
x = fluid.data(name='x', shape=[None, 10], dtype='float32', lod_level=1) x = fluid.data(name='x', shape=[None, 10], dtype='float32', lod_level=1)
x_last_step = fluid.layers.sequence_last_step(input=x) x_last_step = fluid.layers.sequence_last_step(input=x)
""" """
check_variable_and_dtype(input, 'input', ['float32', 'float64'],
'sequence_last_step')
return sequence_pool(input=input, pool_type="last") return sequence_pool(input=input, pool_type="last")
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.framework import convert_np_dtype_to_dtype_, Program, program_guard
import paddle.fluid.core as core
import numpy as np
import copy
import unittest
import sys
sys.path.append("../")
from op_test import OpTest
class TestSequenceFirstStepOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
def test_Variable():
# the input must be Variable
input_data = np.random.randint(1, 5, [4]).astype("int64")
fluid.layers.sequence_last_step(input_data)
self.assertRaises(TypeError, test_Variable)
def test_input_dtype():
# the dtype of input must be int64
type_data = fluid.layers.data(
name='type_data',
shape=[7, 1],
append_batch_size=False,
dtype='int64',
lod_level=1)
fluid.layers.sequence_last_step(type_data)
self.assertRaises(TypeError, test_input_dtype)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.framework import convert_np_dtype_to_dtype_, Program, program_guard
import paddle.fluid.core as core
import numpy as np
import copy
import unittest
import sys
sys.path.append("../")
from op_test import OpTest
class TestSequenceLastStepOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
def test_Variable():
# the input must be Variable
input_data = np.random.randint(1, 5, [4]).astype("int64")
fluid.layers.sequence_last_step(input_data)
self.assertRaises(TypeError, test_Variable)
def test_input_dtype():
# the dtype of input must be int64
type_data = fluid.layers.data(
name='type_data',
shape=[7, 1],
append_batch_size=False,
dtype='int64',
lod_level=1)
fluid.layers.sequence_last_step(type_data)
self.assertRaises(TypeError, test_input_dtype)
if __name__ == '__main__':
unittest.main()
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.framework import convert_np_dtype_to_dtype_ from paddle.fluid.framework import convert_np_dtype_to_dtype_, Program, program_guard
import paddle.fluid.core as core import paddle.fluid.core as core
import numpy as np import numpy as np
import copy import copy
...@@ -154,5 +154,17 @@ class SequenceMaskTest5_tensor_attr(SequenceMaskTestBase_tensor_attr): ...@@ -154,5 +154,17 @@ class SequenceMaskTest5_tensor_attr(SequenceMaskTestBase_tensor_attr):
self.mask_dtype = 'float64' self.mask_dtype = 'float64'
class TestSequenceMaskOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
input_data = np.random.uniform(1, 5, [4]).astype("float32")
def test_Variable():
# the input must be Variable
fluid.layers.sequence_mask(input_data, maxlen=4)
self.assertRaises(TypeError, test_Variable)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -19,6 +19,8 @@ import unittest ...@@ -19,6 +19,8 @@ import unittest
import numpy as np import numpy as np
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
import paddle.fluid as fluid
from paddle.fluid.framework import Program, program_guard
class TestBeamSearchDecodeOp(unittest.TestCase): class TestBeamSearchDecodeOp(unittest.TestCase):
...@@ -110,5 +112,46 @@ class TestBeamSearchDecodeOpGPU(TestBeamSearchDecodeOp): ...@@ -110,5 +112,46 @@ class TestBeamSearchDecodeOpGPU(TestBeamSearchDecodeOp):
self.place = core.CUDAPlace(0) self.place = core.CUDAPlace(0)
class TestBeamSearchDecodeOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
def test_id_Variable():
# the input pre_ids must be Variable
test_ids = np.random.randint(1, 5, [5, 1]).astype("int64")
scores = fluid.layers.create_array(dtype='float32')
fluid.layers.beam_search_decode(
test_ids, scores, beam_size=5, end_id=0)
self.assertRaises(TypeError, test_id_Variable)
def test_score_Variable():
# the input pre_scores must be Variable
ids = fluid.layers.create_array(dtype='int64')
test_scores = np.random.uniform(1, 5, [5, 1]).astype("float32")
fluid.layers.beam_search_decode(
ids, test_scores, beam_size=5, end_id=0)
self.assertRaises(TypeError, test_score_Variable)
def test_id_dtype():
# the dtype of input pre_ids must be int64
type_ids = fluid.layers.create_array(dtype='float32')
scores = fluid.layers.create_array(dtype='float32')
fluid.layers.beam_search_decode(
type_ids, scores, beam_size=5, end_id=0)
self.assertRaises(TypeError, test_id_dtype)
def test_score_dtype():
# the dtype of input pre_scores must be float32
ids = fluid.layers.create_array(dtype='int64')
type_scores = fluid.layers.create_array(dtype='int64')
fluid.layers.beam_search_decode(
ids, type_scores, beam_size=5, end_id=0)
self.assertRaises(TypeError, test_score_dtype)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -19,6 +19,8 @@ from paddle.fluid.op import Operator, DynamicRecurrentOp ...@@ -19,6 +19,8 @@ from paddle.fluid.op import Operator, DynamicRecurrentOp
import paddle.fluid.core as core import paddle.fluid.core as core
import unittest import unittest
import numpy as np import numpy as np
import paddle.fluid as fluid
from paddle.fluid.framework import Program, program_guard
def create_tensor(scope, name, np_data): def create_tensor(scope, name, np_data):
...@@ -96,5 +98,108 @@ class BeamSearchOpTester(unittest.TestCase): ...@@ -96,5 +98,108 @@ class BeamSearchOpTester(unittest.TestCase):
tensor.set_lod(self.lod) tensor.set_lod(self.lod)
class TestBeamSearchOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
pre_ids = fluid.data(
name='pre_id', shape=[1], lod_level=2, dtype='int64')
pre_scores = fluid.data(
name='pre_scores', shape=[1], lod_level=2, dtype='float32')
probs = fluid.data(name='probs', shape=[10000], dtype='float32')
topk_scores, topk_indices = fluid.layers.topk(probs, k=4)
accu_scores = fluid.layers.elementwise_add(
x=fluid.layers.log(x=topk_scores),
y=fluid.layers.reshape(
pre_scores, shape=[-1]),
axis=0)
def test_preids_Variable():
# the input pre_ids must be Variable
preids_data = np.random.randint(1, 5, [5, 1]).astype("int64")
fluid.layers.beam_search(
pre_ids=preids_data,
pre_scores=pre_scores,
ids=topk_indices,
scores=accu_scores,
beam_size=4,
end_id=1)
self.assertRaises(TypeError, test_preids_Variable)
def test_prescores_Variable():
# the input pre_scores must be Variable
prescores_data = np.random.uniform(1, 5,
[5, 1]).astype("float32")
fluid.layers.beam_search(
pre_ids=pre_ids,
pre_scores=prescores_data,
ids=topk_indices,
scores=accu_scores,
beam_size=4,
end_id=1)
self.assertRaises(TypeError, test_prescores_Variable)
def test_ids_Variable():
# the input ids must be Variable or None
ids_data = np.random.randint(1, 5, [5, 1]).astype("int64")
fluid.layers.beam_search(
pre_ids=pre_ids,
pre_scores=pre_scores,
ids=ids_data,
scores=accu_scores,
beam_size=4,
end_id=1)
self.assertRaises(TypeError, test_ids_Variable)
def test_scores_Variable():
# the input scores must be Variable
scores_data = np.random.uniform(1, 5, [5, 1]).astype("float32")
fluid.layers.beam_search(
pre_ids=pre_ids,
pre_scores=pre_scores,
ids=topk_indices,
scores=scores_data,
beam_size=4,
end_id=1)
self.assertRaises(TypeError, test_scores_Variable)
def test_preids_dtype():
# the dtype of input pre_ids must be int64
preids_type_data = fluid.data(
name='preids_type_data',
shape=[1],
lod_level=2,
dtype='float32')
fluid.layers.beam_search(
pre_ids=preids_type_data,
pre_scores=pre_scores,
ids=topk_indices,
scores=accu_scores,
beam_size=4,
end_id=1)
self.assertRaises(TypeError, test_preids_dtype)
def test_prescores_dtype():
# the dtype of input pre_scores must be float32
prescores_type_data = fluid.data(
name='prescores_type_data',
shape=[1],
lod_level=2,
dtype='int64')
fluid.layers.beam_search(
pre_ids=pre_ids,
pre_scores=prescores_type_data,
ids=topk_indices,
scores=accu_scores,
beam_size=4,
end_id=1)
self.assertRaises(TypeError, test_prescores_dtype)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -541,7 +541,7 @@ def def_seq2seq_model(num_layers, hidden_size, dropout_prob, src_vocab_size, ...@@ -541,7 +541,7 @@ def def_seq2seq_model(num_layers, hidden_size, dropout_prob, src_vocab_size,
loss = layers.unsqueeze(loss, axes=[2]) loss = layers.unsqueeze(loss, axes=[2])
max_tar_seq_len = layers.shape(target)[1] max_tar_seq_len = layers.shape(target)[1]
tar_mask = layers.sequence_mask( tar_mask = layers.sequence_mask(
target_length, maxlen=max_tar_seq_len, dtype="float") target_length, maxlen=max_tar_seq_len, dtype="float32")
loss = loss * tar_mask loss = loss * tar_mask
loss = layers.reduce_mean(loss, dim=[0]) loss = layers.reduce_mean(loss, dim=[0])
loss = layers.reduce_sum(loss) loss = layers.reduce_sum(loss)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册