diff --git a/python/paddle/distributed/utils/moe_utils.py b/python/paddle/distributed/utils/moe_utils.py index cd7c0e758d4e0be678cff83684e92acd6a15cc4e..eb7e73c363bf2cde2020c3d1be444fc33307ad2b 100644 --- a/python/paddle/distributed/utils/moe_utils.py +++ b/python/paddle/distributed/utils/moe_utils.py @@ -71,7 +71,6 @@ def global_scatter( .. code-block:: python # required: distributed - import numpy as np import paddle from paddle.distributed import init_parallel_env init_parallel_env() @@ -79,17 +78,14 @@ def global_scatter( world_size = 2 d_model = 2 in_feat = d_model - local_input_buf = np.array([[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]], \ - dtype=np.float32) + local_input_buf = paddle.to_tensor([[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]], \ + dtype='float32', stop_gradient=False) if paddle.distributed.ParallelEnv().local_rank == 0: - local_count = np.array([2, 1, 1, 1]) - global_count = np.array([2, 1, 1, 1]) + local_count = paddle.to_tensor([2, 1, 1, 1], dtype="int64") + global_count = paddle.to_tensor([2, 1, 1, 1], dtype="int64") else: - local_count = np.array([1, 1, 2, 1]) - global_count = np.array([1, 1, 2, 1]) - local_input_buf = paddle.to_tensor(local_input_buf, dtype="float32", stop_gradient=False) - local_count = paddle.to_tensor(local_count, dtype="int64") - global_count = paddle.to_tensor(global_count, dtype="int64") + local_count = paddle.to_tensor([1, 1, 2, 1], dtype="int64") + global_count = paddle.to_tensor([1, 1, 2, 1], dtype="int64") a = paddle.distributed.utils.global_scatter(local_input_buf, \ local_count, global_count) a.stop_gradient = False @@ -193,7 +189,6 @@ def global_gather( .. code-block:: python # required: distributed - import numpy as np import paddle from paddle.distributed import init_parallel_env init_parallel_env() @@ -201,17 +196,15 @@ def global_gather( world_size = 2 d_model = 2 in_feat = d_model - local_input_buf = np.array([[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]],\ - dtype=np.float32) + local_input_buf = paddle._to_tensor([[1, 2],[3, 4],[5, 6],[7, 8],[9, 10]],\ + dtype='float32', stop_gradient=False) if paddle.distributed.ParallelEnv().local_rank == 0: - local_count = np.array([2, 1, 1, 1]) - global_count = np.array([2, 1, 1, 1]) + local_count = paddle.to_tensor([2, 1, 1, 1], dtype="int64") + global_count = paddle.to_tensor([2, 1, 1, 1], dtype="int64") else: - local_count = np.array([1, 1, 2, 1]) - global_count = np.array([1, 1, 2, 1]) - local_input_buf = paddle.to_tensor(local_input_buf, dtype="float32", stop_gradient=False) - local_count = paddle.to_tensor(local_count, dtype="int64") - global_count = paddle.to_tensor(global_count, dtype="int64") + local_count = paddle.to_tensor([1, 1, 2, 1], dtype="int64") + global_count = paddle.to_tensor([1, 1, 2, 1], dtype="int64") + a = paddle.distributed.utils.global_gather(local_input_buf, local_count, global_count) print(a) # out for rank 0: [[1, 2], [3, 4], [7, 8], [1, 2], [7, 8]] diff --git a/python/paddle/fft.py b/python/paddle/fft.py index 8bc95cd37e9f240c985bb3dfee68aae5a0695eed..1e4ca9237469ba54628dfa965294d780a02f7743 100644 --- a/python/paddle/fft.py +++ b/python/paddle/fft.py @@ -521,26 +521,29 @@ def fftn(x, s=None, axes=None, norm="backward", name=None): .. code-block:: python - import numpy as np import paddle - x = np.mgrid[:4, :4, :4][1] - xp = paddle.to_tensor(x) - fftn_xp = paddle.fft.fftn(xp, axes=(1, 2)).numpy() - print(fftn_xp) - # [[[24.+0.j 0.+0.j 0.+0.j 0.-0.j] + arr = paddle.arange(4, dtype="float64") + x = paddle.meshgrid(arr, arr, arr)[1] + + fftn_xp = paddle.fft.fftn(x, axes=(1, 2)) + print(fftn_xp.numpy()) + # [[[24.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+8.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]] - # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] + + # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+8.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]] - # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] + + # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+8.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]] - # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] + + # [[24.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+8.j 0.+0.j 0.+0.j 0.-0.j] # [-8.+0.j 0.+0.j 0.+0.j 0.-0.j] # [-8.-8.j 0.+0.j 0.+0.j 0.-0.j]]] @@ -901,15 +904,16 @@ def fft2(x, s=None, axes=(-2, -1), norm="backward", name=None): .. code-block:: python - import numpy as np import paddle - x = np.mgrid[:2, :2][1] - xp = paddle.to_tensor(x) - fft2_xp = paddle.fft.fft2(xp).numpy() + arr = paddle.arange(2, dtype="float64") + x = paddle.meshgrid(arr, arr)[0] + + fft2_xp = paddle.fft.fft2(x) print(fft2_xp) - # [[ 2.+0.j -2.+0.j] - # [ 0.+0.j 0.+0.j]] + # Tensor(shape=[2, 2], dtype=complex128, place=Place(gpu:0), stop_gradient=True, + # [[ (2+0j), 0j ], + # [(-2+0j), 0j ]]) """ _check_at_least_ndim(x, 2) @@ -971,15 +975,16 @@ def ifft2(x, s=None, axes=(-2, -1), norm="backward", name=None): .. code-block:: python - import numpy as np import paddle - x = np.mgrid[:2, :2][1] - xp = paddle.to_tensor(x) - ifft2_xp = paddle.fft.ifft2(xp).numpy() + arr = paddle.arange(2, dtype="float64") + x = paddle.meshgrid(arr, arr)[0] + + ifft2_xp = paddle.fft.ifft2(x) print(ifft2_xp) - # [[ 0.5+0.j -0.5+0.j] - # [ 0. +0.j 0. +0.j]] + # Tensor(shape=[2, 2], dtype=complex128, place=Place(gpu:0), stop_gradient=True, + # [[ (0.5+0j), 0j ], + # [(-0.5+0j), 0j ]]) """ _check_at_least_ndim(x, 2) if s is not None: @@ -1033,16 +1038,17 @@ def rfft2(x, s=None, axes=(-2, -1), norm="backward", name=None): .. code-block:: python import paddle - import numpy as np - - x = paddle.to_tensor(np.mgrid[:5, :5][0].astype(np.float32)) - print(paddle.fft.rfft2(x)) - # Tensor(shape=[5, 3], dtype=complex64, place=CUDAPlace(0), stop_gradient=True, - # [[ (50+0j) , (1.1920928955078125e-07+0j) , 0j ], - # [(-12.5+17.204774856567383j) , (-9.644234211236835e-08+7.006946134424652e-08j) , 0j ], - # [(-12.500000953674316+4.061495304107666j) , (3.6837697336977726e-08-1.1337477445749755e-07j), 0j ], - # [(-12.500000953674316-4.061495304107666j) , (3.6837697336977726e-08+1.1337477445749755e-07j), 0j ], - # [(-12.5-17.204774856567383j) , (-9.644234211236835e-08-7.006946134424652e-08j) , 0j ]]) + + arr = paddle.arange(5, dtype="float64") + x = paddle.meshgrid(arr, arr)[0] + + result = paddle.fft.rfft2(x) + print(result.numpy()) + # [[ 50. +0.j 0. +0.j 0. +0.j ] + # [-12.5+17.20477401j 0. +0.j 0. +0.j ] + # [-12.5 +4.0614962j 0. +0.j 0. +0.j ] + # [-12.5 -4.0614962j 0. +0.j 0. +0.j ] + # [-12.5-17.20477401j 0. +0.j 0. +0.j ]] """ _check_at_least_ndim(x, 2) if s is not None: @@ -1192,13 +1198,20 @@ def ihfft2(x, s=None, axes=(-2, -1), norm="backward", name=None): .. code-block:: python - import numpy as np import paddle - x = np.mgrid[:5, :5][0].astype(np.float64) - xp = paddle.to_tensor(x) - ihfft2_xp = paddle.fft.ihfft2(xp).numpy() - print(ihfft2_xp) + arr = paddle.arange(5, dtype="float64") + x = paddle.meshgrid(arr, arr)[0] + print(x) + # Tensor(shape=[5, 5], dtype=float64, place=Place(gpu:0), stop_gradient=True, + # [[0., 0., 0., 0., 0.], + # [1., 1., 1., 1., 1.], + # [2., 2., 2., 2., 2.], + # [3., 3., 3., 3., 3.], + # [4., 4., 4., 4., 4.]]) + + ihfft2_xp = paddle.fft.ihfft2(x) + print(ihfft2_xp.numpy()) # [[ 2. +0.j 0. +0.j 0. +0.j ] # [-0.5-0.68819096j 0. +0.j 0. +0.j ] # [-0.5-0.16245985j 0. +0.j 0. +0.j ] @@ -1250,15 +1263,11 @@ def fftfreq(n, d=1.0, dtype=None, name=None): .. code-block:: python - import numpy as np import paddle - x = np.array([3, 1, 2, 2, 3], dtype=float) scalar_temp = 0.5 - n = x.size - fftfreq_xp = paddle.fft.fftfreq(n, d=scalar_temp) + fftfreq_xp = paddle.fft.fftfreq(5, d=scalar_temp) print(fftfreq_xp) - # Tensor(shape=[5], dtype=float32, place=CUDAPlace(0), stop_gradient=True, # [ 0. , 0.40000001, 0.80000001, -0.80000001, -0.40000001]) """ @@ -1301,13 +1310,10 @@ def rfftfreq(n, d=1.0, dtype=None, name=None): .. code-block:: python - import numpy as np import paddle - x = np.array([3, 1, 2, 2, 3], dtype=float) scalar_temp = 0.3 - n = x.size - rfftfreq_xp = paddle.fft.rfftfreq(n, d=scalar_temp) + rfftfreq_xp = paddle.fft.rfftfreq(5, d=scalar_temp) print(rfftfreq_xp) # Tensor(shape=[3], dtype=float32, place=CUDAPlace(0), stop_gradient=True, @@ -1343,15 +1349,17 @@ def fftshift(x, axes=None, name=None): .. code-block:: python - import numpy as np import paddle - x = np.array([3, 1, 2, 2, 3], dtype=float) - n = x.size - fftfreq_xp = paddle.fft.fftfreq(n, d=0.3) - res = paddle.fft.fftshift(fftfreq_xp).numpy() + fftfreq_xp = paddle.fft.fftfreq(5, d=0.3) + print(fftfreq_xp) + # Tensor(shape=[5], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [ 0. , 0.66666669, 1.33333337, -1.33333337, -0.66666669]) + + res = paddle.fft.fftshift(fftfreq_xp) print(res) - # [-1.3333334 -0.6666667 0. 0.6666667 1.3333334] + # Tensor(shape=[5], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [-1.33333337, -0.66666669, 0. , 0.66666669, 1.33333337]) """ shape = paddle.shape(x) @@ -1386,15 +1394,17 @@ def ifftshift(x, axes=None, name=None): .. code-block:: python - import numpy as np import paddle - x = np.array([3, 1, 2, 2, 3], dtype=float) - n = x.size - fftfreq_xp = paddle.fft.fftfreq(n, d=0.3) - res = paddle.fft.ifftshift(fftfreq_xp).numpy() + fftfreq_xp = paddle.fft.fftfreq(5, d=0.3) + print(fftfreq_xp) + # Tensor(shape=[5], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [ 0. , 0.66666669, 1.33333337, -1.33333337, -0.66666669]) + + res = paddle.fft.ifftshift(fftfreq_xp) print(res) - # [ 1.3333334 -1.3333334 -0.6666667 0. 0.6666667] + # Tensor(shape=[5], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [ 1.33333337, -1.33333337, -0.66666669, 0. , 0.66666669]) """ shape = paddle.shape(x) diff --git a/python/paddle/sparse/nn/functional/activation.py b/python/paddle/sparse/nn/functional/activation.py index cbe2ddd0d79dbfbfef6c57b516f560cf7cd0e702..93c5e74014f3e0f0c931768116a89352deb7b009 100644 --- a/python/paddle/sparse/nn/functional/activation.py +++ b/python/paddle/sparse/nn/functional/activation.py @@ -87,28 +87,31 @@ def softmax(x, axis=-1, name=None): .. code-block:: python import paddle - import numpy as np paddle.seed(100) - mask = np.random.rand(3, 4) < 0.5 - np_x = np.random.rand(3, 4) * mask - # [[0. 0. 0.96823406 0.19722934] - # [0.94373937 0. 0.02060066 0.71456372] - # [0. 0. 0. 0.98275049]] - - csr = paddle.to_tensor(np_x).to_sparse_csr() - # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, - # crows=[0, 2, 5, 6], - # cols=[2, 3, 0, 2, 3, 3], - # values=[0.96823406, 0.19722934, 0.94373937, 0.02060066, 0.71456372, - # 0.98275049]) + mask = paddle.rand((3, 4)) < 0.5 + x = paddle.rand((3, 4)) * mask + print(x) + # Tensor(shape=[3, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [[0.83438963, 0.70008713, 0. , 0.88831252], + # [0.02200012, 0. , 0.75432241, 0.65136462], + # [0.96088767, 0.82938021, 0.35367414, 0.86653489]]) + + csr = x.to_sparse_csr() + print(csr) + # Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, + # crows=[0 , 3 , 6 , 10], + # cols=[0, 1, 3, 0, 2, 3, 0, 1, 2, 3], + # values=[0.83438963, 0.70008713, 0.88831252, 0.02200012, 0.75432241, + # 0.65136462, 0.96088767, 0.82938021, 0.35367414, 0.86653489]) out = paddle.sparse.nn.functional.softmax(csr) - # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, - # crows=[0, 2, 5, 6], - # cols=[2, 3, 0, 2, 3, 3], - # values=[0.68373820, 0.31626180, 0.45610887, 0.18119845, 0.36269269, - # 1. ]) + print(out) + # Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, + # crows=[0 , 3 , 6 , 10], + # cols=[0, 1, 3, 0, 2, 3, 0, 1, 2, 3], + # values=[0.34132850, 0.29843223, 0.36023921, 0.20176248, 0.41964680, + # 0.37859070, 0.30015594, 0.26316854, 0.16354506, 0.27313042]) """ return _C_ops.sparse_softmax(x, axis) diff --git a/python/paddle/sparse/nn/layer/activation.py b/python/paddle/sparse/nn/layer/activation.py index 3ad856f69fbec139bf671d3f03e87abb56813ea8..91d5c198189dd9a490a62261cf2de8700f0d02ad 100644 --- a/python/paddle/sparse/nn/layer/activation.py +++ b/python/paddle/sparse/nn/layer/activation.py @@ -86,29 +86,32 @@ class Softmax(Layer): .. code-block:: python import paddle - import numpy as np - paddle.seed(100) - - mask = np.random.rand(3, 4) < 0.5 - np_x = np.random.rand(3, 4) * mask - # [[0. 0. 0.96823406 0.19722934] - # [0.94373937 0. 0.02060066 0.71456372] - # [0. 0. 0. 0.98275049]] - - csr = paddle.to_tensor(np_x).to_sparse_csr() - # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, - # crows=[0, 2, 5, 6], - # cols=[2, 3, 0, 2, 3, 3], - # values=[0.96823406, 0.19722934, 0.94373937, 0.02060066, 0.71456372, - # 0.98275049]) + paddle.seed(2022) + + mask = paddle.rand((3, 4)) < 0.7 + x = paddle.rand((3, 4)) * mask + print(x) + # Tensor(shape=[3, 4], dtype=float32, place=Place(gpu:0), stop_gradient=True, + # [[0.08325022, 0.27030438, 0. , 0.83883715], + # [0. , 0.95856029, 0.24004589, 0. ], + # [0.14500992, 0.17088132, 0. , 0. ]]) + + csr = x.to_sparse_csr() + print(csr) + # Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, + # crows=[0, 3, 5, 7], + # cols=[0, 1, 3, 1, 2, 0, 1], + # values=[0.08325022, 0.27030438, 0.83883715, 0.95856029, 0.24004589, + # 0.14500992, 0.17088132]) softmax = paddle.sparse.nn.Softmax() out = softmax(csr) - # Tensor(shape=[3, 4], dtype=paddle.float64, place=Place(gpu:0), stop_gradient=True, - # crows=[0, 2, 5, 6], - # cols=[2, 3, 0, 2, 3, 3], - # values=[0.68373820, 0.31626180, 0.45610887, 0.18119845, 0.36269269, - # 1. ]) + print(out) + # Tensor(shape=[3, 4], dtype=paddle.float32, place=Place(gpu:0), stop_gradient=True, + # crows=[0, 3, 5, 7], + # cols=[0, 1, 3, 1, 2, 0, 1], + # values=[0.23070428, 0.27815846, 0.49113727, 0.67227983, 0.32772022, + # 0.49353254, 0.50646752]) """ def __init__(self, axis=-1, name=None):