From ec970f12703a18b22a2c471f46cac376e358f477 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Thu, 20 Jun 2019 16:15:10 +0800 Subject: [PATCH] Fix create_lod_tensor (#18196) * fix_create_lod_tensor, test=develop * remove program_guard import,test=develop * fix windows numpy default int32 error, test=develop --- python/paddle/fluid/lod_tensor.py | 29 +++++++++++++++----- python/paddle/fluid/tests/test_lod_tensor.py | 15 ++++++++-- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index 160b28d69c..3cfd6ff549 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -15,6 +15,7 @@ from __future__ import print_function from . import core +from .data_feeder import DataToLoDTensorConverter import numpy as np __all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] @@ -71,19 +72,33 @@ def create_lod_tensor(data, recursive_seq_lens, place): if isinstance(data, core.LoDTensor): return create_lod_tensor(np.array(data), recursive_seq_lens, place) elif isinstance(data, list): - # When input data is a list, it only deal with the case where the base element - # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated - # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number - # of words or other indexes in the sequence. + # dtype and shape is not important here, + # we only want to reuse code of DataToLoDTensorConverter + converter = DataToLoDTensorConverter( + place=place, + lod_level=len(recursive_seq_lens), + shape=[], + dtype=core.VarDesc.VarType.FP32) + new_recursive_seq_lens = [] for seq in data: new_recursive_seq_lens.append(len(seq)) + converter.feed(seq) + assert [ new_recursive_seq_lens ] == recursive_seq_lens, "data and recursive_seq_lens do not match" - flattened_data = np.concatenate(data, axis=0) - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - return create_lod_tensor(flattened_data, recursive_seq_lens, place) + + arr = np.array(converter.data) + + # FIXME(zjl): the original logic of create_lod_tensor would append + # 1 to the shape. Maybe it is not a right way? Currently, we only + # follow the previous logic + arr = arr.reshape(arr.shape + (1, )) + tensor = core.LoDTensor() + tensor.set(arr, place) + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + return tensor elif isinstance(data, np.ndarray): tensor = core.LoDTensor() tensor.set(data, place) diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py index 9bd343c103..a3eae5a3c8 100644 --- a/python/paddle/fluid/tests/test_lod_tensor.py +++ b/python/paddle/fluid/tests/test_lod_tensor.py @@ -58,7 +58,8 @@ class TestLoDTensor(unittest.TestCase): def test_create_lod_tensor(self): # Create LoDTensor from a list - data = [[1, 2, 3], [3, 4]] + data = [[np.int64(1), np.int64(2), np.int64(3)], + [np.int64(3), np.int64(4)]] wrong_recursive_seq_lens = [[2, 2]] correct_recursive_seq_lens = [[3, 2]] self.assertRaises(AssertionError, create_lod_tensor, data, @@ -67,13 +68,23 @@ class TestLoDTensor(unittest.TestCase): fluid.CPUPlace()) self.assertEqual(tensor.recursive_sequence_lengths(), correct_recursive_seq_lens) + self.assertEqual(tensor._dtype(), core.VarDesc.VarType.INT64) + self.assertEqual(tensor.shape(), [5, 1]) + self.assertTrue( + np.array_equal( + np.array(tensor), + np.array([1, 2, 3, 3, 4]).reshape(tensor.shape()).astype( + 'int64'))) # Create LoDTensor from numpy array - data = np.random.random([10, 1]) + data = np.random.random([10, 1]).astype('float64') recursive_seq_lens = [[2, 1], [3, 3, 4]] tensor = create_lod_tensor(data, recursive_seq_lens, fluid.CPUPlace()) self.assertEqual(tensor.recursive_sequence_lengths(), recursive_seq_lens) + self.assertEqual(tensor._dtype(), core.VarDesc.VarType.FP64) + self.assertEqual(tensor.shape(), [10, 1]) + self.assertTrue(np.array_equal(np.array(tensor), data)) # Create LoDTensor from another LoDTensor, they are differnt instances new_recursive_seq_lens = [[2, 2, 1], [1, 2, 2, 3, 2]] -- GitLab