From bda7eab77d01bde2402fb011d6efdffbe9955847 Mon Sep 17 00:00:00 2001
From: liuwei1031 <46661762+liuwei1031@users.noreply.github.com>
Date: Thu, 26 Sep 2019 18:59:57 +0800
Subject: [PATCH] improve the error message when handling ndarray with
 unsupported dtype  (#19949)

* impove error message when passing ndarray with object dtype

* imporve message format

* change assert to raise TypeError

* remind user how to locate the irregular data instead of printing

* add unittest for input array type check
---
 python/paddle/fluid/reader.py                 | 12 ++++++
 .../unittests/test_py_reader_error_msg.py     | 39 +++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py

diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py
index f10a0ed5548..769735bb391 100644
--- a/python/paddle/fluid/reader.py
+++ b/python/paddle/fluid/reader.py
@@ -451,6 +451,17 @@ class GeneratorLoader(DataLoaderBase):
             assert not self._iterable, "reset() cannot be called when DataLoader is iterable"
             self._reset()
 
+    @classmethod
+    def _check_input_array(cls, item):
+        arr = np.array(item)
+        if arr.dtype == np.object:
+            raise TypeError((
+                "\n\tFaild to convert input data to a regular ndarray :\n\t* Usually "
+                "this means the input data contains nested lists with different lengths. "
+                "\n\t* Check the reader function passed to 'decorate_batch_generator'"
+                " to locate the data causes this issue.\n\t* Please consider using "
+                "'fluid.create_lod_tensor' to convert it to a LoD-Tensor."))
+
     def _start(self):
         def __thread_main__():
             try:
@@ -458,6 +469,7 @@ class GeneratorLoader(DataLoaderBase):
                     array = core.LoDTensorArray()
                     for item in tensors:
                         if not isinstance(item, core.LoDTensor):
+                            self._check_input_array(item)
                             tmp = core.LoDTensor()
                             tmp.set(item, core.CPUPlace())
                             item = tmp
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py b/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py
new file mode 100644
index 00000000000..4c45908c5c3
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_error_msg.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import unittest
+import numpy as np
+
+
+class TestPyReaderErrorMsg(unittest.TestCase):
+    def test_check_input_array(self):
+        fluid.reader.GeneratorLoader._check_input_array([
+            np.random.randint(
+                100, size=[2]), np.random.randint(
+                    100, size=[2]), np.random.randint(
+                        100, size=[2])
+        ])
+        self.assertRaises(
+            TypeError,
+            fluid.reader.GeneratorLoader._check_input_array, [
+                np.random.randint(
+                    100, size=[2]), np.random.randint(
+                        100, size=[1]), np.random.randint(
+                            100, size=[3])
+            ])
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab