From 6e3554e485390329293e07b17bbbcab7e7ecb05b Mon Sep 17 00:00:00 2001 From: Li Fuchen Date: Mon, 11 May 2020 10:43:36 +0800 Subject: [PATCH] OP(warpctc, add_position_encoding, scaled_dot_product_attention) error message enhancement (#24261) (#24372) * enhance add_position_encoding error message, test=develop * enhance warpctc & scaled_dot_product_attention error message, test=develop * modified error message and ctest of scaled_dot_product_attention, test=develop --- .../operators/add_position_encoding_op.cc | 8 +- .../operators/add_position_encoding_op.h | 35 +++++-- paddle/fluid/operators/warpctc_op.cc | 36 ++++--- paddle/fluid/operators/warpctc_op.h | 62 ++++++++---- python/paddle/fluid/layers/loss.py | 6 ++ python/paddle/fluid/layers/nn.py | 2 + python/paddle/fluid/nets.py | 33 ++++++- .../test_add_position_encoding_op.py | 15 +++ .../test_scaled_dot_product_attention.py | 95 +++++++++++++++++++ .../fluid/tests/unittests/test_warpctc_op.py | 54 +++++++++++ 10 files changed, 292 insertions(+), 54 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py diff --git a/paddle/fluid/operators/add_position_encoding_op.cc b/paddle/fluid/operators/add_position_encoding_op.cc index a821331673a..629fedba6e3 100644 --- a/paddle/fluid/operators/add_position_encoding_op.cc +++ b/paddle/fluid/operators/add_position_encoding_op.cc @@ -23,11 +23,9 @@ class AddPositionEncodingOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - "X(Input) of add_position_encoding_op should not be null."); - PADDLE_ENFORCE( - ctx->HasOutput("Out"), - "Out(Output) of add_position_encoding_op should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "AddPositionEncoding"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", + "AddPositionEncoding"); auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); diff --git a/paddle/fluid/operators/add_position_encoding_op.h b/paddle/fluid/operators/add_position_encoding_op.h index 30d54e5cf98..71bd343cced 100644 --- a/paddle/fluid/operators/add_position_encoding_op.h +++ b/paddle/fluid/operators/add_position_encoding_op.h @@ -39,25 +39,40 @@ class AddPositionEncodingKernel : public framework::OpKernel { int enc_size = 0; if (x_lod.empty()) { - PADDLE_ENFORCE( - x_dim.size() == 3UL, - "The input X of Add Position Encoding should be 3-D Tensor!"); + PADDLE_ENFORCE_EQ(x_dim.size(), 3, + platform::errors::InvalidArgument( + "The input(X)'s dimension of AddPositionEncodingOp " + "should be equal to " + "3, but received %d. ", + x_dim.size())); batch_size = x_dim[0]; max_seq_len = x_dim[1]; enc_size = x_dim[2]; } else { - PADDLE_ENFORCE( - x_dim.size() == 2UL, - "The input X of Add Position Encoding should be 2-D LoDTensor!"); - PADDLE_ENFORCE( - x_lod.size() == 1UL, - "The Add Position Encoding Op only supports lod_level == 1!"); + PADDLE_ENFORCE_EQ(x_dim.size(), 2, + platform::errors::InvalidArgument( + "The input(X)'s dimension of AddPositionEncodingOp " + "should be equal to " + "2, but received %d. ", + x_dim.size())); + PADDLE_ENFORCE_EQ(x_lod.size(), 1, + platform::errors::InvalidArgument( + "The input(X)'s lod level of AddPositionEncodingOp " + "should be equal to " + "1, but received %d. ", + x_lod.size())); + batch_size = x_lod[0].size() - 1; max_seq_len = -1; enc_size = x_dim[1]; } - PADDLE_ENFORCE(enc_size % 2 == 0, "Only support even encode size!"); + PADDLE_ENFORCE_EQ(enc_size % 2, 0, + platform::errors::InvalidArgument( + "The input(X)'s feature size of " + "AddPositionEncodingOp only support even, " + "but received an odd number: %d. ", + enc_size)); const int half_size = enc_size / 2; for (int i = 0; i < batch_size; ++i) { diff --git a/paddle/fluid/operators/warpctc_op.cc b/paddle/fluid/operators/warpctc_op.cc index 2ca2588470e..6758987713f 100644 --- a/paddle/fluid/operators/warpctc_op.cc +++ b/paddle/fluid/operators/warpctc_op.cc @@ -28,14 +28,11 @@ class WarpCTCOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Logits"), - "Input(Logits) of WarpCTCOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Label"), - "Input(Label) of WarpCTCOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("WarpCTCGrad"), - "Output(WarpCTCGrad) of WarpCTCOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Loss"), - "Output(Loss) of WarpCTCOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("Logits"), "Input", "Logits", "WarpCTC"); + OP_INOUT_CHECK(ctx->HasInput("Label"), "Input", "Label", "WarpCTC"); + OP_INOUT_CHECK(ctx->HasOutput("WarpCTCGrad"), "Output", "WarpCTCGrad", + "WarpCTC"); + OP_INOUT_CHECK(ctx->HasOutput("Loss"), "Output", "Loss", "WarpCTC"); auto logits_dims = ctx->GetInputDim("Logits"); int blank = ctx->Attrs().Get("blank"); @@ -47,9 +44,18 @@ class WarpCTCOp : public framework::OperatorWithKernel { sequence_width = static_cast(framework::product(logits_dims) / logits_dims[0]); } - PADDLE_ENFORCE((blank >= 0) && (blank < sequence_width), - "The value of Attr(blank) should be in interval [0, %d).", - sequence_width); + + PADDLE_ENFORCE_GE( + blank, 0, platform::errors::InvalidArgument( + "The value of Attr(blank) should be in interval [0, %d), " + "but received %d", + blank)); + PADDLE_ENFORCE_LT( + blank, sequence_width, + platform::errors::InvalidArgument( + "The value of Attr(blank) should be in interval [0, %d), " + "but received %d", + blank)); // TODO(liuyiqun): it is tricky to set the wrong dimension here. ctx->SetOutputDim("Loss", {-1, 1}); @@ -160,10 +166,10 @@ class WarpCTCGradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("WarpCTCGrad"), - "Input(WarpCTCGrad) of WarpCTCGradOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("Logits")), - "Output(Logits@GRAD) of WarpCTCGradOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("WarpCTCGrad"), "Input", "WarpCTCGrad", + "WarpCTCGrad"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Logits")), "Output", + "WarpCTCGrad", "WarpCTCGrad"); ctx->SetOutputDim(framework::GradVarName("Logits"), ctx->GetInputDim("Logits")); ctx->ShareLoD("Logits", /*->*/ framework::GradVarName("Logits")); diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index 7cde263f6da..951a258fd21 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -65,13 +65,18 @@ class WarpCTCFunctor { ctcStatus_t status = platform::dynload::get_workspace_size( cpu_label_lengths, cpu_input_lengths, static_cast(sequence_width), static_cast(num_sequences), options_, &workspace_bytes); - PADDLE_ENFORCE_EQ(CTC_STATUS_SUCCESS, status, - "warp-ctc [version %d] Error in get_workspace_size: ", - warpctc_version_, - platform::dynload::ctcGetStatusString(status)); - PADDLE_ENFORCE_GT(workspace_bytes, 0UL, - "Bytes of workspace got by warp-ctc function, " - "get_workspace_size(), should be larger than 0."); + + PADDLE_ENFORCE_EQ( + CTC_STATUS_SUCCESS, status, + platform::errors::PreconditionNotMet( + "warp-ctc [version %d] Error in get_workspace_size: %s", + warpctc_version_, platform::dynload::ctcGetStatusString(status))); + PADDLE_ENFORCE_GT( + workspace_bytes, 0UL, + platform::errors::InvalidArgument( + "Bytes of workspace got by warp-ctc function, " + "get_workspace_size() should be larger than 0, but received %d", + workspace_bytes)); auto& dev_ctx = ctx.template device_context(); size_t workspace_elements = workspace_bytes / sizeof(float) + 1UL; @@ -88,10 +93,12 @@ class WarpCTCFunctor { input, gradient, cpu_labels, cpu_label_lengths, cpu_input_lengths, static_cast(sequence_width), static_cast(num_sequences), cpu_loss, workspace_data, options_); - PADDLE_ENFORCE_EQ(CTC_STATUS_SUCCESS, status, - "warp-ctc [version %d] Error in compute_ctc_loss: ", - warpctc_version_, - platform::dynload::ctcGetStatusString(status)); + + PADDLE_ENFORCE_EQ( + CTC_STATUS_SUCCESS, status, + platform::errors::PreconditionNotMet( + "warp-ctc [version %d] Error in get_workspace_size: %s", + warpctc_version_, platform::dynload::ctcGetStatusString(status))); } protected: @@ -156,23 +163,40 @@ class WarpCTCKernel : public framework::OpKernel { labels_length_cpu.data()[i]); } } else { + PADDLE_ENFORCE_GT(logits->NumLevels(), 0UL, + platform::errors::InvalidArgument( + "Input(Logits) Tensor of WarpCTC " + "does not contain LoD information.")); + PADDLE_ENFORCE_GT(label->NumLevels(), 0UL, + platform::errors::InvalidArgument( + "Input(Label) Tensor of WarpCTC " + "does not contain LoD information.")); + logits_lod = framework::ToAbsOffset(logits->lod())[0]; auto logits_dims = logits->dims(); + PADDLE_ENFORCE_EQ( logits_dims[0], static_cast(logits_lod.back()), - "The first dimension of Input(Logits) should be equal to " - "the sum of all sequences' lengths."); + platform::errors::InvalidArgument( + "The first dimension of Input(Logits) should be equal to " + "the sum of all sequences' lengths = %d., but received %d. ", + static_cast(logits_lod.back()), logits_dims[0])); label_lod = framework::ToAbsOffset(label->lod())[0]; auto label_dims = label->dims(); - PADDLE_ENFORCE_EQ( - label_dims[0], label->numel(), - "The width of each timestep in Input(Label) should be 1."); + PADDLE_ENFORCE_EQ(label_dims[1], 1, + platform::errors::InvalidArgument( + "The last dimension of Input(Label) should be 1, " + "but received %d", + label_dims[1])); num_sequences = logits_lod.size() - 1; - PADDLE_ENFORCE_EQ(num_sequences, label_lod.size() - 1, - "The number of sequences of Input(Logits) should be " - "equal to that of Input(Label)."); + PADDLE_ENFORCE_EQ( + num_sequences, label_lod.size() - 1, + platform::errors::InvalidArgument( + "The number of sequences of Input(Logits) should be " + "equal to that of Input(Label) = %d, but received %d", + label_lod.size() - 1, num_sequences)); sequence_width = logits->numel() / logits_dims[0]; max_sequence_length = math::MaximumSequenceLength(logits_lod); diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index 825f6d7a180..7c0010a71a8 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -610,8 +610,14 @@ def warpctc(input, print(output) """ helper = LayerHelper('warpctc', **locals()) + check_variable_and_dtype(input, 'input', ['float32'], "warpctc") + check_variable_and_dtype(label, 'label', ['int32'], "warpctc") this_inputs = {'Logits': [input], 'Label': [label]} if input_length is not None and label_length is not None: + check_variable_and_dtype(input_length, 'LogitsLength', ['int64'], + "warpctc") + check_variable_and_dtype(label_length, 'LabelLength', ['int64'], + "warpctc") this_inputs['LogitsLength'] = [input_length] this_inputs['LabelLength'] = [label_length] diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index f35dbf66ad0..0ddc81abbfb 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -13880,6 +13880,8 @@ def add_position_encoding(input, alpha, beta, name=None): """ helper = LayerHelper('add_position_encoding', **locals()) + check_variable_and_dtype(input, 'input', ['float32', 'float64'], + "add_position_encoding") dtype = helper.input_dtype() out = helper.create_variable_for_type_inference(dtype=dtype) diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index 21def94ad1e..dcdb5817c0b 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -15,6 +15,7 @@ from __future__ import print_function import six from . import layers +from .data_feeder import check_variable_and_dtype, convert_dtype __all__ = [ "simple_img_conv_pool", @@ -410,9 +411,10 @@ def scaled_dot_product_attention(queries, Multi-Head Attention. Raises: + TypeError: The dtype of inputs keys, values and queries should be the same. ValueError: Inputs queries, keys and values should all be 3-D tensors. ValueError: The hidden size of queries and keys should be the same. - ValueError: The max sequence length in query batch and in key batch should be the same. + ValueError: The max sequence length in value batch and in key batch should be the same. ValueError: he hidden size of keys must be divisible by the number of attention heads. ValueError: he hidden size of values must be divisible by the number of attention heads. @@ -427,17 +429,38 @@ def scaled_dot_product_attention(queries, contexts = fluid.nets.scaled_dot_product_attention(queries, keys, values) contexts.shape # [3, 5, 10] """ + check_variable_and_dtype(queries, 'queries', ['float32', 'float64'], + "scaled_dot_product_attention") + check_variable_and_dtype(keys, 'keys', ['float32', 'float64'], + "scaled_dot_product_attention") + check_variable_and_dtype(values, 'values', ['float32', 'float64'], + "scaled_dot_product_attention") + + if not (queries.dtype == keys.dtype == values.dtype): + raise TypeError( + "The dtype of keys, values and queries should be the same." + "But received queries.dtype = %s, " + " keys.dtype = %s, values.dtype) = %s." % + (convert_dtype(queries.dtype), convert_dtype(keys.dtype), + convert_dtype(values.dtype))) + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): raise ValueError( - "Inputs queries, keys and values should all be 3-D tensors.") + "Inputs queries, keys and values should all be 3-D tensors." + "But received len(queries.shape) = %d, " + "len(keys.shape) = %d, len(values.shape) = %d." % + (len(queries.shape), len(keys.shape), len(values.shape))) if queries.shape[-1] != keys.shape[-1]: raise ValueError( - "The hidden size of queries and keys should be the same.") + "The hidden size of queries and keys should be the same." + "But received queries' hidden size = %d and keys' hidden size = %d." + % (queries.shape[-1], keys.shape[-1])) if keys.shape[-2] != values.shape[-2]: raise ValueError( - "The max sequence length in query batch and in key batch " - "should be the same.") + "The max sequence length in value batch and in key batch " + "should be the same. But received max sequence length in value batch " + "= %d, in key batch = %d." % (values.shape[-2], keys.shape[-2])) if keys.shape[-1] % num_heads != 0: raise ValueError("The hidden size of keys (%d) must be divisible " "by the number of attention heads (%d)." % diff --git a/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py b/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py index 5bccd2ab06a..908b458cb0a 100644 --- a/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py +++ b/python/paddle/fluid/tests/unittests/test_add_position_encoding_op.py @@ -16,6 +16,8 @@ import numpy as np import math import paddle.fluid.core as core from op_test import OpTest +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard class TestAddPositionEncodingTensorOp(OpTest): @@ -130,5 +132,18 @@ class TestAddPositionEncodingLoDTensorOp(OpTest): start += max_length +class TestAddPositionEncodingOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + input_data = np.random.random((4, 16, 8)).astype("float32") + + def test_Variable(): + # the input type must be Variable + fluid.layers.add_position_encoding( + input=input_data, alpha=1.0, beta=1.0) + + self.assertRaises(TypeError, test_Variable) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py b/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py new file mode 100644 index 00000000000..a205189e4f9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_scaled_dot_product_attention.py @@ -0,0 +1,95 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard + + +class TestScaledDotProductAttentionError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + queries = fluid.data( + name="queries", shape=[3, 5, 9], dtype="float32") + keys = fluid.data(name="keys", shape=[3, 6, 9], dtype="float32") + values = fluid.data( + name="values", shape=[3, 6, 10], dtype="float32") + + def test_queries_Variable(): + queries_data = np.random.rand(3, 5, 9).astype("float32") + fluid.nets.scaled_dot_product_attention(queries_data, keys, + values) + + self.assertRaises(TypeError, test_queries_Variable) + + def test_keys_Variable(): + keys_data = np.random.rand(3, 6, 9).astype("float32") + fluid.nets.scaled_dot_product_attention(queries, keys_data, + values) + + self.assertRaises(TypeError, test_keys_Variable) + + def test_values_Variable(): + values_data = np.random.rand(3, 6, 10).astype("float32") + fluid.nets.scaled_dot_product_attention(queries, keys, + values_data) + + self.assertRaises(TypeError, test_values_Variable) + + def test_diff_dtype(): + keys_error = fluid.data( + name="keys_error", shape=[3, 6, 9], dtype="float64") + values_error = fluid.data( + name="values_error", shape=[3, 6, 10], dtype="float64") + fluid.nets.scaled_dot_product_attention(queries, keys_error, + values_error) + + self.assertRaises(TypeError, test_diff_dtype) + + def test_diff_dim(): + keys_error_dim = fluid.data( + name="keys_error_dim", shape=[3, 6], dtype="float32") + values_error_dim = fluid.data( + name="values_error_dim", shape=[3], dtype="float32") + fluid.nets.scaled_dot_product_attention(queries, keys_error_dim, + values_error_dim) + + self.assertRaises(ValueError, test_diff_dim) + + def test_diff_hidden_size(): + queries_error_hs = fluid.data( + name="queries_error_hs", shape=[3, 5, 9], dtype="float32") + keys_error_hs = fluid.data( + name="keys_error_hs", shape=[3, 6, 10], dtype="float32") + fluid.nets.scaled_dot_product_attention(queries_error_hs, + keys_error_hs, values) + + self.assertRaises(ValueError, test_diff_hidden_size) + + def test_diff_max_len(): + keys_error_len = fluid.data( + name="keys_error_len", shape=[3, 7, 9], dtype="float32") + values_error_len = fluid.data( + name="values_error_len", shape=[3, 6, 10], dtype="float32") + fluid.nets.scaled_dot_product_attention(queries, keys_error_len, + values_error_len) + + self.assertRaises(ValueError, test_diff_max_len) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index 2d8180fe25d..449ac959188 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -19,6 +19,8 @@ import unittest import numpy as np from op_test import OpTest from test_softmax_op import stable_softmax +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard CUDA_BLOCK_SIZE = 512 @@ -335,5 +337,57 @@ class TestWarpCTCOpWithPaddingCase1(TestWarpCTCOpWithPadding): self.norm_by_times = False +class TestWarpCTCOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + logits = fluid.data( + name='logits', shape=[5, 16, 6], dtype='float32') + logits_length = fluid.data( + name='logits_length', shape=[None], dtype='int64') + label = fluid.data(name='label', shape=[16, 3], dtype='int32') + label_length = fluid.data( + name='labels_length', shape=[None], dtype='int64') + + def test_logits_Variable(): + logits_data = np.random.rand(5, 16, 6).astype("float32") + fluid.layers.warpctc( + input=logits_data, + label=label, + input_length=logits_length, + label_length=label_length) + + self.assertRaises(TypeError, test_logits_Variable) + + def test_label_Variable(): + label_data = np.random.randint(0, 5, [5, 1]).astype("int32") + fluid.layers.warpctc( + input=logits, + label=label_data, + input_length=logits_length, + label_length=label_length) + + self.assertRaises(TypeError, test_label_Variable) + + def test_logits_len_Variable(): + logits_length_data = np.array([5] * 16).astype("int64") + fluid.layers.warpctc( + input=logits, + label=label, + input_length=logits_length_data, + label_length=label_length) + + self.assertRaises(TypeError, test_logits_len_Variable) + + def test_label_len_Variable(): + label_length_data = np.array([3] * 16).astype("int64") + fluid.layers.warpctc( + input=logits, + label=label, + input_length=logits_length, + label_length=label_length_data) + + self.assertRaises(TypeError, test_label_len_Variable) + + if __name__ == "__main__": unittest.main() -- GitLab