From ec970f12703a18b22a2c471f46cac376e358f477 Mon Sep 17 00:00:00 2001
From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com>
Date: Thu, 20 Jun 2019 16:15:10 +0800
Subject: [PATCH] Fix create_lod_tensor  (#18196)

* fix_create_lod_tensor, test=develop

* remove program_guard import,test=develop

* fix windows numpy default int32 error, test=develop
---
 python/paddle/fluid/lod_tensor.py            | 29 +++++++++++++++-----
 python/paddle/fluid/tests/test_lod_tensor.py | 15 ++++++++--
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py
index 160b28d69c..3cfd6ff549 100644
--- a/python/paddle/fluid/lod_tensor.py
+++ b/python/paddle/fluid/lod_tensor.py
@@ -15,6 +15,7 @@
 from __future__ import print_function
 
 from . import core
+from .data_feeder import DataToLoDTensorConverter
 import numpy as np
 
 __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
@@ -71,19 +72,33 @@ def create_lod_tensor(data, recursive_seq_lens, place):
     if isinstance(data, core.LoDTensor):
         return create_lod_tensor(np.array(data), recursive_seq_lens, place)
     elif isinstance(data, list):
-        # When input data is a list, it only deal with the case where the base element
-        # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated
-        # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number
-        # of words or other indexes in the sequence.
+        # dtype and shape is not important here,
+        # we only want to reuse code of DataToLoDTensorConverter
+        converter = DataToLoDTensorConverter(
+            place=place,
+            lod_level=len(recursive_seq_lens),
+            shape=[],
+            dtype=core.VarDesc.VarType.FP32)
+
         new_recursive_seq_lens = []
         for seq in data:
             new_recursive_seq_lens.append(len(seq))
+            converter.feed(seq)
+
         assert [
             new_recursive_seq_lens
         ] == recursive_seq_lens, "data and recursive_seq_lens do not match"
-        flattened_data = np.concatenate(data, axis=0)
-        flattened_data = flattened_data.reshape([len(flattened_data), 1])
-        return create_lod_tensor(flattened_data, recursive_seq_lens, place)
+
+        arr = np.array(converter.data)
+
+        # FIXME(zjl): the original logic of create_lod_tensor would append
+        # 1 to the shape. Maybe it is not a right way? Currently, we only
+        # follow the previous logic
+        arr = arr.reshape(arr.shape + (1, ))
+        tensor = core.LoDTensor()
+        tensor.set(arr, place)
+        tensor.set_recursive_sequence_lengths(recursive_seq_lens)
+        return tensor
     elif isinstance(data, np.ndarray):
         tensor = core.LoDTensor()
         tensor.set(data, place)
diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py
index 9bd343c103..a3eae5a3c8 100644
--- a/python/paddle/fluid/tests/test_lod_tensor.py
+++ b/python/paddle/fluid/tests/test_lod_tensor.py
@@ -58,7 +58,8 @@ class TestLoDTensor(unittest.TestCase):
 
     def test_create_lod_tensor(self):
         # Create LoDTensor from a list
-        data = [[1, 2, 3], [3, 4]]
+        data = [[np.int64(1), np.int64(2), np.int64(3)],
+                [np.int64(3), np.int64(4)]]
         wrong_recursive_seq_lens = [[2, 2]]
         correct_recursive_seq_lens = [[3, 2]]
         self.assertRaises(AssertionError, create_lod_tensor, data,
@@ -67,13 +68,23 @@ class TestLoDTensor(unittest.TestCase):
                                    fluid.CPUPlace())
         self.assertEqual(tensor.recursive_sequence_lengths(),
                          correct_recursive_seq_lens)
+        self.assertEqual(tensor._dtype(), core.VarDesc.VarType.INT64)
+        self.assertEqual(tensor.shape(), [5, 1])
+        self.assertTrue(
+            np.array_equal(
+                np.array(tensor),
+                np.array([1, 2, 3, 3, 4]).reshape(tensor.shape()).astype(
+                    'int64')))
 
         # Create LoDTensor from numpy array
-        data = np.random.random([10, 1])
+        data = np.random.random([10, 1]).astype('float64')
         recursive_seq_lens = [[2, 1], [3, 3, 4]]
         tensor = create_lod_tensor(data, recursive_seq_lens, fluid.CPUPlace())
         self.assertEqual(tensor.recursive_sequence_lengths(),
                          recursive_seq_lens)
+        self.assertEqual(tensor._dtype(), core.VarDesc.VarType.FP64)
+        self.assertEqual(tensor.shape(), [10, 1])
+        self.assertTrue(np.array_equal(np.array(tensor), data))
 
         # Create LoDTensor from another LoDTensor, they are differnt instances
         new_recursive_seq_lens = [[2, 2, 1], [1, 2, 2, 3, 2]]
-- 
GitLab