From e09d042784082dc759a0d7a05e153e4b4a80e282 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Thu, 10 Oct 2019 16:33:52 +0800 Subject: [PATCH] [Cherry Pick] Cherry Pick fluid.data Related Changes into Release/1.6 (#20352) * Add support for None for fluid.data (#20228) * Refine Error Message for New Data API (#20204) * Modify English doc of fluid.data (#20248) --- paddle/fluid/API.spec | 2 +- python/paddle/fluid/data.py | 14 ++++-- python/paddle/fluid/executor.py | 18 ++++--- .../test_feed_data_check_shape_type.py | 50 +++++++++++++++++-- 4 files changed, 69 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index d6e65f3e042..97832627ea6 100755 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -567,7 +567,7 @@ paddle.fluid.contrib.BasicLSTMUnit.sublayers (ArgSpec(args=['self', 'include_sub paddle.fluid.contrib.BasicLSTMUnit.train (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.basic_lstm (ArgSpec(args=['input', 'init_hidden', 'init_cell', 'hidden_size', 'num_layers', 'sequence_length', 'dropout_prob', 'bidirectional', 'batch_first', 'param_attr', 'bias_attr', 'gate_activation', 'activation', 'forget_bias', 'dtype', 'name'], varargs=None, keywords=None, defaults=(1, None, 0.0, False, True, None, None, None, None, 1.0, 'float32', 'basic_lstm')), ('document', 'fe4d0c3c55a162b8cfe10b05fabb7ce4')) paddle.fluid.contrib.ctr_metric_bundle (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'b68d12366896c41065fc3738393da2aa')) -paddle.fluid.data (ArgSpec(args=['name', 'shape', 'dtype', 'lod_level'], varargs=None, keywords=None, defaults=('float32', 0)), ('document', '28ee6b35836449f44ce53467f0616137')) +paddle.fluid.data (ArgSpec(args=['name', 'shape', 'dtype', 'lod_level'], varargs=None, keywords=None, defaults=('float32', 0)), ('document', 'a44fce9b5c8919bf5937a1cc0fe484ca')) paddle.fluid.dygraph.Layer ('paddle.fluid.dygraph.layers.Layer', ('document', 'a889d5affd734ede273e94d4257163ab')) paddle.fluid.dygraph.Layer.__init__ (ArgSpec(args=['self', 'name_scope', 'dtype'], varargs=None, keywords=None, defaults=(VarType.FP32,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph.Layer.add_parameter (ArgSpec(args=['self', 'name', 'parameter'], varargs=None, keywords=None, defaults=None), ('document', 'f35ab374c7d5165c3daf3bd64a5a2ec1')) diff --git a/python/paddle/fluid/data.py b/python/paddle/fluid/data.py index b90a681ed8e..26f24f029cd 100644 --- a/python/paddle/fluid/data.py +++ b/python/paddle/fluid/data.py @@ -13,6 +13,7 @@ # limitations under the License. import numpy as np +import six from . import core from .layer_helper import LayerHelper @@ -41,7 +42,9 @@ def data(name, shape, dtype='float32', lod_level=0): Args: name (str): The name/alias of the variable, see :ref:`api_guide_Name` for more details. - shape (list|tuple): List|Tuple of integers declaring the shape. + shape (list|tuple): List|Tuple of integers declaring the shape. You can + set "None" at a dimension to indicate the dimension can be of any + size. For example, it is useful to set changable batch size as "None" dtype (np.dtype|VarType|str, optional): The type of the data. Supported dtype: bool, float16, float32, float64, int8, int16, int32, int64, uint8. Default: float32 @@ -62,10 +65,10 @@ def data(name, shape, dtype='float32', lod_level=0): # User can only feed data of the same shape to x x = fluid.data(name='x', shape=[3, 2, 1], dtype='float32') - # Creates a variable with changable batch size -1. + # Creates a variable with changable batch size. # Users can feed data of any batch size into y, # but size of each data sample has to be [2, 1] - y = fluid.data(name='y', shape=[-1, 2, 1], dtype='float32') + y = fluid.data(name='y', shape=[None, 2, 1], dtype='float32') z = x + y @@ -86,6 +89,11 @@ def data(name, shape, dtype='float32', lod_level=0): """ helper = LayerHelper('data', **locals()) + shape = list(shape) + for i in six.moves.range(len(shape)): + if shape[i] is None: + shape[i] = -1 + return helper.create_global_variable( name=name, shape=shape, diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 460f1eae741..6e8d07e2836 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -22,6 +22,7 @@ import warnings import numpy as np from .wrapped_decorator import signature_safe_contextmanager import six +from .data_feeder import convert_dtype from .framework import Program, default_main_program, Variable, convert_np_dtype_to_dtype_ from . import core from . import compiler @@ -204,13 +205,18 @@ def check_feed_shape_type(var, feed): """ if var.desc.need_check_feed(): if not dimension_is_compatible_with(feed.shape(), var.shape): - raise ValueError('Cannot feed value of shape %r for Variable %r, ' - 'which has shape %r' % - (feed.shape, var.name, var.shape)) + raise ValueError( + 'The feeded Variable %r should have dimensions = %d, shape = ' + '%r, but received feeded shape %r' % + (var.name, len(var.shape), var.shape, feed.shape())) if not dtype_is_compatible_with(feed._dtype(), var.dtype): - raise ValueError('Cannot feed value of type %r for Variable %r, ' - 'which has type %r' % - (feed._dtype(), var.name, var.dtype)) + var_dtype_format = convert_dtype(var.dtype) if isinstance( + var.dtype, core.VarDesc.VarType) else var.dtype + feed_dtype_format = convert_dtype(feed._dtype()) if isinstance( + feed._dtype(), core.VarDesc.VarType) else feed._dtype() + raise ValueError( + 'The data type of feeded Variable %r must be %r, but received %r' + % (var.name, var_dtype_format, feed_dtype_format)) return True diff --git a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py index 2489ae8e266..6cb48399292 100644 --- a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py +++ b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py @@ -21,6 +21,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.compiler as compiler import paddle.fluid.core as core +import six import unittest os.environ['CPU_NUM'] = str(4) @@ -65,25 +66,64 @@ class TestFeedData(unittest.TestCase): return in_data, label, loss def test(self): - for use_cuda in [True, False] if core.is_compiled_with_cuda( - ) else [False]: + for use_cuda in [True, + False] if core.is_compiled_with_cuda() else [False]: for use_parallel_executor in [False, True]: print('Test Parameters:'), print({ 'use_cuda': use_cuda, 'use_parallel_executor': use_parallel_executor, }) + # Test feeding without error self._test_feed_data_match_shape_type(use_cuda, use_parallel_executor) self._test_feed_data_contains_neg_one(use_cuda, use_parallel_executor) - with self.assertRaises(ValueError): + + # Test exception message when feeding with error + batch_size = self._get_batch_size(use_cuda, + use_parallel_executor) + if six.PY2: + in_shape_tuple = (long(-1), long(3), long(4), long(8)) + feed_shape_list = [ + long(batch_size), long(3), long(4), long(5) + ] + else: + in_shape_tuple = (-1, 3, 4, 8) + feed_shape_list = [batch_size, 3, 4, 5] + + with self.assertRaises(ValueError) as shape_mismatch_err: self._test_feed_data_shape_mismatch(use_cuda, use_parallel_executor) + self.assertEqual( + str(shape_mismatch_err.exception), + "The feeded Variable %r should have dimensions = %r, " + "shape = %r, but received feeded shape %r" % + (u'data', len(in_shape_tuple), in_shape_tuple, + feed_shape_list)) + + with self.assertRaises(ValueError) as dtype_mismatch_err: + self._test_feed_data_dtype_mismatch(use_cuda, + use_parallel_executor) + self.assertEqual( + str(dtype_mismatch_err.exception), + "The data type of feeded Variable %r must be 'int64', but " + "received 'float64'" % (u'label')) + + def _test_feed_data_dtype_mismatch(self, use_cuda, use_parallel_executor): + batch_size = self._get_batch_size(use_cuda, use_parallel_executor) + in_size = [batch_size, 3, 4, 5] + feed_in_data = np.random.uniform( + size=[batch_size, 3, 4, 5]).astype(np.float32) + label_size = [batch_size, 1] + feed_label = np.random.randint( + low=0, high=self.class_num, size=[batch_size, 1]).astype(np.float64) + self._feed_data_in_executor(in_size, label_size, feed_in_data, + feed_label, use_cuda, use_parallel_executor) def _test_feed_data_shape_mismatch(self, use_cuda, use_parallel_executor): batch_size = self._get_batch_size(use_cuda, use_parallel_executor) - in_size = [-1, 3, 4, 8] + in_size = [None, 3, 4, 8] feed_in_data = np.random.uniform( size=[batch_size, 3, 4, 5]).astype(np.float32) label_size = [-1, 1] @@ -97,7 +137,7 @@ class TestFeedData(unittest.TestCase): in_size = [-1, 3, 4, 5] feed_in_data = np.random.uniform( size=[batch_size, 3, 4, 5]).astype(np.float32) - label_size = (-1, 1) + label_size = (None, 1) feed_label = np.random.randint( low=0, high=self.class_num, size=[batch_size, 1]).astype(np.int64) self._feed_data_in_executor(in_size, label_size, feed_in_data, -- GitLab