From 59b891200e60193d5f7a0f13a1869d1cbf9b3538 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 7 Jun 2021 16:50:45 +0800 Subject: [PATCH] [NPU] add private api for memcpy_op (#33258) * add private api for memcpy_op * change place dtype * add private python api for memcpy op --- python/paddle/fluid/layers/tensor.py | 31 ++++++++-- .../fluid/tests/unittests/test_memcpy_op.py | 8 +++ python/paddle/tensor/creation.py | 61 +++++++++++++++++++ 3 files changed, 95 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index a62217c628c..65cc745dbab 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -36,11 +36,32 @@ from paddle.utils import deprecated from .utils import check_shape __all__ = [ - 'create_tensor', 'create_parameter', 'create_global_var', 'cast', - 'tensor_array_to_tensor', 'concat', 'sums', 'assign', - 'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax', - 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite', - 'range', 'linspace', 'zeros_like', 'ones_like', 'diag', 'eye', 'triu' + 'create_tensor', + 'create_parameter', + 'create_global_var', + 'cast', + 'tensor_array_to_tensor', + 'concat', + 'sums', + 'assign', + 'fill_constant_batch_size_like', + 'fill_constant', + 'argmin', + 'argmax', + 'argsort', + 'ones', + 'zeros', + 'reverse', + 'has_inf', + 'has_nan', + 'isfinite', + 'range', + 'linspace', + 'zeros_like', + 'ones_like', + 'diag', + 'eye', + 'triu', ] diff --git a/python/paddle/fluid/tests/unittests/test_memcpy_op.py b/python/paddle/fluid/tests/unittests/test_memcpy_op.py index a089b33b8ea..38e9379bc16 100755 --- a/python/paddle/fluid/tests/unittests/test_memcpy_op.py +++ b/python/paddle/fluid/tests/unittests/test_memcpy_op.py @@ -171,6 +171,14 @@ class TestMemcpyOPError(unittest.TestCase): fetch_list=[lod_tensor_var.name, pinned_var.name]) +class TestMemcpyApi(unittest.TestCase): + def test_api(self): + a = paddle.ones([1024, 1024]) + b = paddle.tensor.creation._memcpy(a, paddle.CUDAPinnedPlace()) + self.assertEqual(b.place.__repr__(), "CUDAPinnedPlace") + self.assertTrue(np.array_equal(a.numpy(), b.numpy())) + + if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index fb0244a4149..7f37ab488f6 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -1054,3 +1054,64 @@ def assign(x, output=None): check_type(x, 'x', (Variable, np.ndarray, list, tuple, float, int, bool), 'assign') return tensor.assign(x, output) + + +#NOTE(zhiqiu): not public +def _memcpy(input, place=None, output=None): + """ + + The OP copies the :attr:`input` to the :attr:`output`. + NOTE: currently, only support CUDAPlace <-> CUDAPinnedPlace or NPUPlace <-> CPUPlace. + + Parameters: + input (Tensor): A tensor. Its data type supports float16, float32, float64, int32, int64, and bool. + device (Place): Target place for the output. + output (Tensor, optional): A tensor. If :attr:`output` is None, a new tensor will + be created as :attr:`output`. Default: None. + + Returns: + Tensor: A tensor with the same shape, data type and value as :attr:`input`. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + data = paddle.full(shape=[3, 2], fill_value=2.5, dtype='float64') # [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]] + result = paddle._memcpy(data, place=paddle.CPUPlace()) # result2 = [[2.5, 2.5], [2.5, 2.5], [2.5, 2.5]] + """ + helper = LayerHelper('memcpy', **locals()) + check_type(input, 'input', (Variable), 'memcpy') + + if isinstance(input, (Variable, core.VarBase)): + check_dtype(input.dtype, 'input', [ + 'float16', 'uint16', 'float32', 'float64', 'int32', 'int64', + 'uint8', 'bool' + ], 'memcpy', '(When the type of input in memcpy is Variable.)') + if output is None: + output = helper.create_variable_for_type_inference(dtype=input.dtype) + + dst_place_type = -1 + if place is None: + dst_place_type = -1 + else: + p = core.Place() + p.set_place(place) + if p.is_cpu_place(): + dst_place_type = 0 + elif p.is_gpu_place(): + dst_place_type = 1 + elif p.is_cuda_pinned_place(): + dst_place_type = 2 + elif p.is_xpu_place(): + dst_place_type = 3 + elif p.is_npu_place(): + dst_place_type = 4 + + attrs = {'dst_place_type': dst_place_type} + helper.append_op( + type='memcpy', + inputs={'X': [input]}, + outputs={'Out': [output]}, + attrs=attrs) + return output -- GitLab