未验证 提交 1490aaa9 编写于 作者: U ustiniankw 提交者: GitHub

[cherry-pick2.4]en-docs warning&error fix (#48332)

* fixdocs, test=document_fix

* fixdocs, test=document_fix
上级 3fa7a736
......@@ -28,12 +28,13 @@ _HYBRID_PARALLEL_GROUP = None
class ParallelMode(object):
"""
There are all the parallel modes currently supported:
- DATA_PARALLEL: Distribute input data to different devices.
- TENSOR_PARALLEL: Shards tensors in the network to different devices.
- PIPELINE_PARALLEL: Place different layers of the network on different devices.
- SHARDING_PARALLEL: Segment the model parameters, parameter gradients and optimizer states
corresponding to the parameters to each device.
- DATA_PARALLEL: Distribute input data to different devices.
- TENSOR_PARALLEL: Shards tensors in the network to different devices.
- PIPELINE_PARALLEL: Place different layers of the network on different devices.
- SHARDING_PARALLEL: Segment the model parameters, parameter gradients and optimizer states corresponding to the parameters to each device.
Examples:
.. code-block:: python
......@@ -43,6 +44,7 @@ class ParallelMode(object):
print(parallel_mode.DATA_PARALLEL) # 0
"""
DATA_PARALLEL = 0
TENSOR_PARALLEL = 1
PIPELINE_PARALLEL = 2
......@@ -50,14 +52,16 @@ class ParallelMode(object):
class CommunicateTopology(object):
def __init__(self,
hybrid_group_names=["data", "pipe", "sharding", "model"],
dims=[1, 1, 1, 1]):
def __init__(
self,
hybrid_group_names=["data", "pipe", "sharding", "model"],
dims=[1, 1, 1, 1],
):
self._parallel_names = hybrid_group_names
self._dims = dims
self.coordinate = collections.namedtuple('Coordinate',
self._parallel_names)
self.coordinate = collections.namedtuple(
'Coordinate', self._parallel_names
)
self._world_size = reduce(lambda x, y: x * y, self._dims)
ranges = [range(d) for d in self._dims]
......@@ -65,7 +69,8 @@ class CommunicateTopology(object):
self._coord2rank = dict(zip(all_coordinate, range(len(all_coordinate))))
self._rank2coord = dict(
zip(self._coord2rank.values(), self._coord2rank.keys()))
zip(self._coord2rank.values(), self._coord2rank.keys())
)
def get_hybrid_group_names(self):
return self._parallel_names
......@@ -90,7 +95,8 @@ class CommunicateTopology(object):
def get_axis_list(self, axis_name, index):
axis = self._parallel_names.index(axis_name)
ranks = [
self._coord2rank[coord] for coord in self._coord2rank.keys()
self._coord2rank[coord]
for coord in self._coord2rank.keys()
if coord[axis] == index
]
ranks.sort()
......@@ -132,7 +138,6 @@ class CommunicateTopology(object):
class HybridCommunicateGroup(object):
def __init__(self, topology):
self.nranks = paddle.distributed.get_world_size()
self.global_rank = paddle.distributed.get_rank()
......@@ -148,10 +153,16 @@ class HybridCommunicateGroup(object):
self._sharding_parallel_id = self._get_sharding_parallel_id()
self.stage_id = self._get_pipe_parallel_id()
assert self._check_vaild_topo(
), "Here is an unreasonable topogy setting. world_size: {}, but" \
"mp_num: {}, sharding_num: {}, pp_num: {}, dp_num: {}".format(self.nranks,
self._mp_degree, self._sharding_degree, self._pp_degree, self._dp_degree)
assert self._check_vaild_topo(), (
"Here is an unreasonable topogy setting. world_size: {}, but"
"mp_num: {}, sharding_num: {}, pp_num: {}, dp_num: {}".format(
self.nranks,
self._mp_degree,
self._sharding_degree,
self._pp_degree,
self._dp_degree,
)
)
# create comm group for data parallel
self._dp_group, self._dp_comm_group = self._set_comm_group("data")
......@@ -164,26 +175,43 @@ class HybridCommunicateGroup(object):
# create comm group for sharding parallel
self._sharding_group, self._sharding_comm_group = self._set_comm_group(
"sharding")
"sharding"
)
# create global group for check inf_nan / clip global norm
self._check_group, self._check_comm_group = self._set_check_group(
"data")
"data"
)
# create p2p group
self.is_first_stage = (self.stage_id == 0)
self.is_last_stage = (self.stage_id == (self._pp_degree - 1))
self.is_first_stage = self.stage_id == 0
self.is_last_stage = self.stage_id == (self._pp_degree - 1)
# create p2p_groups
if self._pp_degree > 1:
self._set_p2p_group()
debug_str = "HybridParallelInfo: rank_id: %d, mp_degree: %d, " \
"sharding_degree: %d, pp_degree: %d, dp_degree: %d" % (self.global_rank, self._mp_degree,
self._sharding_degree, self._pp_degree, self._dp_degree)
debug_str += ", mp_group: %s, sharding_group: %s, pp_group: %s, dp_group: %s, check/clip group: %s" % (
self._mp_group, self._sharding_group, self._pp_group,
self._dp_group, self._check_group)
debug_str = (
"HybridParallelInfo: rank_id: %d, mp_degree: %d, "
"sharding_degree: %d, pp_degree: %d, dp_degree: %d"
% (
self.global_rank,
self._mp_degree,
self._sharding_degree,
self._pp_degree,
self._dp_degree,
)
)
debug_str += (
", mp_group: %s, sharding_group: %s, pp_group: %s, dp_group: %s, check/clip group: %s"
% (
self._mp_group,
self._sharding_group,
self._pp_group,
self._dp_group,
self._check_group,
)
)
logger.info(debug_str)
global _HYBRID_PARALLEL_GROUP
......@@ -195,7 +223,12 @@ class HybridCommunicateGroup(object):
# adding its parallel logic within that parallelism
# when use sharding alone, it should have its own parallelism for its parallel logic
# TODO modify 3 others parallel to support sharding
if self._mp_degree == 1 and self._pp_degree == 1 and self._dp_degree == 1 and self._sharding_degree > 1:
if (
self._mp_degree == 1
and self._pp_degree == 1
and self._dp_degree == 1
and self._sharding_degree > 1
):
return ParallelMode.SHARDING_PARALLEL
elif self._mp_degree == 1 and self._pp_degree == 1:
return ParallelMode.DATA_PARALLEL
......@@ -206,7 +239,13 @@ class HybridCommunicateGroup(object):
return ParallelMode.PIPELINE_PARALLEL
def _check_vaild_topo(self):
return self._dp_degree * self._mp_degree * self._pp_degree * self._sharding_degree == self.nranks
return (
self._dp_degree
* self._mp_degree
* self._pp_degree
* self._sharding_degree
== self.nranks
)
def _set_comm_group(self, parallel_method="data"):
parallel_group = []
......@@ -268,14 +307,16 @@ class HybridCommunicateGroup(object):
self.prev_rank = prev_rank
next_group = paddle.distributed.new_group(
ranks=[curr_rank, next_rank])
ranks=[curr_rank, next_rank]
)
if self.global_rank == curr_rank:
self.send_next_group = next_group
elif self.global_rank == next_rank:
self.recv_prev_group = next_group
prev_group = paddle.distributed.new_group(
ranks=[prev_rank, curr_rank])
ranks=[prev_rank, curr_rank]
)
if self.global_rank == curr_rank:
self.send_prev_group = prev_group
......@@ -339,7 +380,12 @@ class HybridCommunicateGroup(object):
return self._pp_comm_group
def get_p2p_groups(self):
return self.send_next_group, self.send_prev_group, self.recv_next_group, self.recv_prev_group
return (
self.send_next_group,
self.send_prev_group,
self.recv_next_group,
self.recv_prev_group,
)
# sharding parallel message:
def _get_sharding_parallel_id(self):
......@@ -363,23 +409,25 @@ class HybridCommunicateGroup(object):
return self._check_comm_group
def get_rank_from_stage(self, stage_id, **kwargs):
return self._topo.get_rank_from_stage(self.global_rank,
pipe=stage_id,
**kwargs)
return self._topo.get_rank_from_stage(
self.global_rank, pipe=stage_id, **kwargs
)
class _CommunicateGroup(object):
""" tmp for static """
"""tmp for static"""
def __init__(self):
global _HYBRID_PARALLEL_GROUP
_HYBRID_PARALLEL_GROUP = self
self.groups = dict()
def set_comm_group(self, group_name, group_rank, group_size, ring_id,
group_ranks):
group = paddle.distributed.collective.Group(group_rank, ring_id,
group_ranks)
def set_comm_group(
self, group_name, group_rank, group_size, ring_id, group_ranks
):
group = paddle.distributed.collective.Group(
group_rank, ring_id, group_ranks
)
self.groups[group_name] = group
def get_group(self, group_name):
......
......@@ -103,6 +103,7 @@ def _check_var_exists(var_name):
def init_parallel_env():
"""
Initialize parallel training environment in dynamic graph mode.
Note:
......@@ -118,6 +119,7 @@ def init_parallel_env():
Examples:
.. code-block:: python
# required: gpu
import paddle
import paddle.nn as nn
......@@ -158,6 +160,7 @@ def init_parallel_env():
if __name__ == '__main__':
dist.spawn(train)
"""
# 0. get env & check world size
......
此差异已折叠。
......@@ -23,9 +23,9 @@ from ...log_helper import get_logger
__all__ = ['add_supported_layer']
_logger = get_logger(__name__,
logging.INFO,
fmt='%(asctime)s-%(levelname)s: %(message)s')
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
def _default_pruning(weight_nparray, m, n, func_name, param_name):
......@@ -38,13 +38,17 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
exlude_cond_shape4 = len(shape) == 4 and shape[1] < m
if exlude_cond_shape2:
_logger.warning(
'{} is not pruned because the first dimension of {} is smaller than {}'
.format(param_name, shape, m))
'{} is not pruned because the first dimension of {} is smaller than {}'.format(
param_name, shape, m
)
)
return weight_pruned_nparray, weight_sparse_mask
if exlude_cond_shape4:
_logger.warning(
'{} is not pruned because the second dimension of {} is smaller than {}'
.format(param_name, shape, m))
'{} is not pruned because the second dimension of {} is smaller than {}'.format(
param_name, shape, m
)
)
return weight_pruned_nparray, weight_sparse_mask
checked_func_name = sparsity.CheckMethod.get_checking_method(func_name)
......@@ -60,13 +64,13 @@ def _default_pruning(weight_nparray, m, n, func_name, param_name):
# sparsity/utils is row-major pruning. That is the reason we have to transpose weight
# matrices beforce invoking create_mask. Then we transpose the result mask to make
# sure its shape to be the same as the input weight.
weight_sparse_mask = sparsity.create_mask(weight_nparray.T,
func_name=func_name,
n=n,
m=m).T
weight_sparse_mask = sparsity.create_mask(
weight_nparray.T, func_name=func_name, n=n, m=m
).T
weight_pruned_nparray = np.multiply(weight_nparray, weight_sparse_mask)
assert sparsity.check_sparsity(weight_pruned_nparray.T, n=n, m=m, func_name=checked_func_name), \
'Pruning {} weight matrix failure!!!'.format(param_name)
assert sparsity.check_sparsity(
weight_pruned_nparray.T, n=n, m=m, func_name=checked_func_name
), 'Pruning {} weight matrix failure!!!'.format(param_name)
return weight_pruned_nparray, weight_sparse_mask
......@@ -78,28 +82,35 @@ supported_layers_and_prune_func_map = {}
def add_supported_layer(layer, pruning_func=None):
r"""
Add supported layers and its corresponding pruning function.
Args:
name (string|Layer): The name or type of layer, needed to support. If layer is `Layer` then
it would be turn to string internally. ASP would use this name to match parameter's name and call
its the corresponding pruning function.
name (string|Layer): The name or type of layer, needed to support. If layer is `Layer` then
it would be turn to string internally. ASP would use this name to match parameter's name and call
its the corresponding pruning function.
pruning_func (function, optional): a function type which receives five argument (weight_nparray,
m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight,
m, n, and func_name, please see `prune_model` for details.
m, n, func_name, param_name), weight_nparray is a nparray of weight, param_name is the name of weight,
m, n, and func_name, please see `prune_model` for details.
"""
name = None
if isinstance(layer, str):
name = layer
elif isinstance(layer, paddle.fluid.dygraph.layers.Layer):
name = paddle.fluid.dygraph.layers._convert_camel_to_snake(
type(layer).__name__)
type(layer).__name__
)
elif issubclass(layer, paddle.fluid.dygraph.layers.Layer):
name = paddle.fluid.dygraph.layers._convert_camel_to_snake(
layer.__name__)
layer.__name__
)
else:
assert "The type of layer should be string of Layer, but got {}!".format(
type(layer))
assert (
"The type of layer should be string of Layer, but got {}!".format(
type(layer)
)
)
if pruning_func is None:
pruning_func = _default_pruning
_supported_layers_and_prune_func_map_lock.acquire()
......
......@@ -27,9 +27,16 @@ from itertools import permutations
import threading
__all__ = [
'calculate_density', 'check_mask_1d', 'get_mask_1d', 'check_mask_2d',
'get_mask_2d_greedy', 'get_mask_2d_best', 'create_mask', 'check_sparsity',
'MaskAlgo', 'CheckMethod'
'calculate_density',
'check_mask_1d',
'get_mask_1d',
'check_mask_2d',
'get_mask_2d_greedy',
'get_mask_2d_best',
'create_mask',
'check_sparsity',
'MaskAlgo',
'CheckMethod',
]
......@@ -76,8 +83,9 @@ class CheckMethod(Enum):
CheckMethod.get_checking_method(MaskAlgo.MASK_2D_BEST)
# CheckMethod.CHECK_2D
"""
assert isinstance(mask_algo, MaskAlgo), \
"mask_algo should be MaskAlgo type"
assert isinstance(
mask_algo, MaskAlgo
), "mask_algo should be MaskAlgo type"
if mask_algo == MaskAlgo.MASK_1D:
return CheckMethod.CHECK_1D
else:
......@@ -86,20 +94,25 @@ class CheckMethod(Enum):
def calculate_density(x):
r"""
Return the density of the input tensor.
Args:
x (nparray): The input tensor.
Returns:
float: The density of :attr:`x`.
float, The density of :attr:`x`.
Examples:
.. code-block:: python
import paddle
import numpy as np
x = np.array([[0, 1, 3, 0],
import paddle
import numpy as np
x = np.array([[0, 1, 3, 0],
[1, 1, 0, 1]])
paddle.incubate.asp.calculate_density(x) # 0.625
paddle.incubate.asp.calculate_density(x) # 0.625
"""
x_flattened = x.flatten()
return float(np.nonzero(x_flattened)[0].size) / x_flattened.size
......@@ -108,7 +121,7 @@ def calculate_density(x):
def _reshape_1d(mat, m):
r"""
Reshape the input 2D matrix to shape (-1, m).
If the second dimension of :attr:`mat` is not a multiples of :attr:`m`,
If the second dimension of :attr:`mat` is not a multiples of :attr:`m`,
then this function would pad the remainder with 0 before reshaping.
.. math::
......@@ -126,7 +139,7 @@ def _reshape_1d(mat, m):
remainder = mat.shape[1] % m
if mat.shape[1] % m > 0:
mat_padded = np.zeros((mat.shape[0], mat.shape[1] + (m - remainder)))
mat_padded[:, :mat.shape[1]] = mat
mat_padded[:, : mat.shape[1]] = mat
shape = mat_padded.shape
return mat_padded.reshape(-1, m), shape
else:
......@@ -136,7 +149,7 @@ def _reshape_1d(mat, m):
def check_mask_1d(mat, n, m):
r"""
Check if every row of the input matrix :attr:`mat` is in 1D `n:m` sparse pattern.
This function would pad the second dimension of :attr:`mat` by zero
This function would pad the second dimension of :attr:`mat` by zero
to be a multiples of :attr:`m` if necessary.
1D `n:m` sparse pattern: At least :attr:`n` zeros in every :math:`1 \times m` block.
......@@ -179,8 +192,8 @@ def check_mask_1d(mat, n, m):
def get_mask_1d(mat, n, m):
r"""
Generate 1D `n:m` sparse pattern mask of the input matrix :attr:`mat`
in row-directory. This function would pad the second dimension of :attr:`mat`
Generate 1D `n:m` sparse pattern mask of the input matrix :attr:`mat`
in row-directory. This function would pad the second dimension of :attr:`mat`
by zero to be a multiples of :attr:`m` before mask generation.
1D `n:m` sparse pattern: At least :attr:`n` zeros in every :math:`1 \times m` block.
......@@ -213,7 +226,7 @@ def get_mask_1d(mat, n, m):
min_order_indices = np.argsort(np.absolute(sub_mat))
mask_flattern[i, min_order_indices[:n].tolist()] = 0
mask_flattern = mask_flattern.reshape(shape)
mask[:, :] = mask_flattern[:, :mat.shape[1]]
mask[:, :] = mask_flattern[:, : mat.shape[1]]
return mask
......@@ -239,12 +252,12 @@ def _reshape_2d(mat, m):
remainder_0 = mat.shape[0] % m
remainder_1 = mat.shape[1] % m
new_shape = (mat.shape[0] if remainder_0 == 0 \
else mat.shape[0] + (m - remainder_0),
mat.shape[1] if remainder_1 == 0 \
else mat.shape[1] + (m - remainder_1))
new_shape = (
mat.shape[0] if remainder_0 == 0 else mat.shape[0] + (m - remainder_0),
mat.shape[1] if remainder_1 == 0 else mat.shape[1] + (m - remainder_1),
)
mat_padded = np.zeros(new_shape)
mat_padded[:mat.shape[0], :mat.shape[1]] = mat
mat_padded[: mat.shape[0], : mat.shape[1]] = mat
mat_flattern = np.empty(new_shape).reshape(-1, m * m)
curr_idx = 0
......@@ -252,9 +265,9 @@ def _reshape_2d(mat, m):
row_end = row_start + m
for col_start in range(0, mat_padded.shape[1], m):
col_end = col_start + m
sub_mat = np.squeeze(mat_padded[row_start:row_end, \
col_start:col_end] \
.reshape(-1))
sub_mat = np.squeeze(
mat_padded[row_start:row_end, col_start:col_end].reshape(-1)
)
mat_flattern[curr_idx] = sub_mat
curr_idx += 1
return mat_flattern, mat_padded.shape
......@@ -263,10 +276,10 @@ def _reshape_2d(mat, m):
def check_mask_2d(mat, n, m):
r"""
Check if every :math:`m \times m` block of the input matrix :attr:`mat` is in 2D `n:m` sparse pattern.
This function would pad each dimension of :attr:`mat` by zero to be a multiples of
This function would pad each dimension of :attr:`mat` by zero to be a multiples of
:attr:`m` if necessary.
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
under the constraint of at least :attr:`n` zeros for each row and column.
Args:
......@@ -304,18 +317,19 @@ def check_mask_2d(mat, n, m):
mat_padded, shape = _reshape_2d(mat, m)
for sub_mat in mat_padded:
sub_mask = np.absolute(np.squeeze(sub_mat.reshape(m, m))) > 0
if (np.sum(np.sum(sub_mask, axis=1) > (m-n)) != 0) and \
(np.sum(np.sum(sub_mask, axis=0) > (m-n)) != 0):
if (np.sum(np.sum(sub_mask, axis=1) > (m - n)) != 0) and (
np.sum(np.sum(sub_mask, axis=0) > (m - n)) != 0
):
return False
return True
def get_mask_2d_greedy(mat, n, m):
r"""
Greedily generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat`.
Greedily generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat`.
This function would pad each dimension of :attr:`mat` by zero to be a multiples of :attr:`m` before mask generation.
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
under the constraint of at least :attr:`n` zeros for each row and column.
Greedily generating: For each :math:`m \times m` block, selecting values to keep in descent order.
......@@ -350,15 +364,17 @@ def get_mask_2d_greedy(mat, n, m):
sub_mask = np.squeeze(mask_padded[idx])
min_order_1d_indices = np.argsort(sub_mat)
min_order_2d_indices = [(int(x / m), x % m)
for x in min_order_1d_indices]
min_order_2d_indices = [
(int(x / m), x % m) for x in min_order_1d_indices
]
row_counter = collections.Counter()
col_counter = collections.Counter()
for i in range(len(min_order_1d_indices) - 1, -1, -1):
matrix_entry = min_order_2d_indices[i]
if (row_counter[matrix_entry[0]] == n) or \
(col_counter[matrix_entry[1]] == n):
if (row_counter[matrix_entry[0]] == n) or (
col_counter[matrix_entry[1]] == n
):
continue
sub_mask[matrix_entry[0], matrix_entry[1]] = 1.0
......@@ -373,7 +389,7 @@ def get_mask_2d_greedy(mat, n, m):
col_end = col_start + m
mask[row_start:row_end, col_start:col_end] = mask_padded[curr_idx]
curr_idx += 1
return mask[:mat.shape[0], :mat.shape[1]]
return mask[: mat.shape[0], : mat.shape[1]]
_valid_2d_patterns_lock = threading.Lock()
......@@ -384,7 +400,7 @@ def _compute_valid_2d_patterns(n, m):
r"""
Compute all vaild 2D `n:m` sparse patterns.
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
under the constraint of at least :attr:`n` zeros for each row and column.
Args:
......@@ -406,8 +422,11 @@ def _compute_valid_2d_patterns(n, m):
patterns = patterns + patterns
patterns = np.asarray(list(set(permutations(patterns, m))))
valid = ((patterns.sum(axis=1) <= n).sum(
axis=1) == m).nonzero()[0].reshape(-1)
valid = (
((patterns.sum(axis=1) <= n).sum(axis=1) == m)
.nonzero()[0]
.reshape(-1)
)
valid_patterns = np.empty((valid.shape[0], m, m))
valid_patterns[:] = patterns[valid[:]]
......@@ -420,11 +439,11 @@ def _compute_valid_2d_patterns(n, m):
def get_mask_2d_best(mat, n, m):
r"""
Generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat`
to form sparse matrix with maximun L1 norm .This function would pad each
Generate 2D `n:m` sparse pattern mask of the input matrix :attr:`mat`
to form sparse matrix with maximun L1 norm .This function would pad each
dimension of :attr:`mat` by zero to be a multiples of :attr:`m` before mask generation.
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
2D `n:m` sparse pattern: At least :math:`n \times n` zeros in every :math:`m \times m` block
under the constraint of at least :attr:`n` zeros for each row and column.
*Note*: L1 norm of sparse matrix from `Best` API is greater than or equal to the one from `Greedy`.
......@@ -454,9 +473,10 @@ def get_mask_2d_best(mat, n, m):
mat_flattern, shape = _reshape_2d(mat, m)
mask_flattern = np.ones_like(mat_flattern).reshape(-1, m, m)
pmax = np.argmax(np.matmul(mat_flattern,
patterns.reshape(patterns.shape[0], m * m).T),
axis=1)
pmax = np.argmax(
np.matmul(mat_flattern, patterns.reshape(patterns.shape[0], m * m).T),
axis=1,
)
mask_flattern[:] = patterns[pmax[:]]
mask = np.empty(shape)
......@@ -468,7 +488,7 @@ def get_mask_2d_best(mat, n, m):
col_end = col_start + m
mask[row_start:row_end, col_start:col_end] = mask_flattern[curr_idx]
curr_idx += 1
return mask[:mat.shape[0], :mat.shape[1]]
return mask[: mat.shape[0], : mat.shape[1]]
def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
......@@ -508,9 +528,10 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
dtype = tensor.dtype
t = tensor.astype(float)
assert isinstance(func_name, MaskAlgo), \
"func_name argumet of create_mask is only accepted as type MaskAlgo. " \
"But got {}".format(type(func_name))
assert isinstance(func_name, MaskAlgo), (
"func_name argumet of create_mask is only accepted as type MaskAlgo. "
"But got {}".format(type(func_name))
)
func = getattr(sys.modules[__name__], func_name.value, None)
if len(shape) == 1:
t = t.reshape(1, shape[0])
......@@ -520,14 +541,20 @@ def create_mask(tensor, func_name=MaskAlgo.MASK_1D, n=2, m=4):
t = t.reshape(shape[0] * shape[1], shape[2])
# 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op
elif len(shape) == 4:
t = t.transpose([0, 1, 3, 2]).reshape(shape[0] * shape[1] * shape[3],
shape[2])
t = t.transpose([0, 1, 3, 2]).reshape(
shape[0] * shape[1] * shape[3], shape[2]
)
mask = func(t, n=n, m=m)
return mask.reshape([shape[0], shape[1], shape[3],
shape[2]]).transpose([0, 1, 3, 2]).astype(dtype)
return (
mask.reshape([shape[0], shape[1], shape[3], shape[2]])
.transpose([0, 1, 3, 2])
.astype(dtype)
)
else:
raise ValueError("The dimension of input tensor is not supported in create_mask, " \
"Only dimension < 4 is supported but got {}".format(len(shape)))
raise ValueError(
"The dimension of input tensor is not supported in create_mask, "
"Only dimension < 4 is supported but got {}".format(len(shape))
)
mask = func(t, n=n, m=m)
return mask.reshape(shape).astype(dtype)
......@@ -566,9 +593,10 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4):
shape = tensor.shape
t = tensor.astype(float)
assert type(func_name) == CheckMethod, \
"func_name argumet of check_sparsity is only accepted as type CheckMethod. " \
"But got {}".format(type(func_name))
assert type(func_name) == CheckMethod, (
"func_name argumet of check_sparsity is only accepted as type CheckMethod. "
"But got {}".format(type(func_name))
)
func = getattr(sys.modules[__name__], func_name.value, None)
if len(shape) == 1:
t = t.reshape(1, shape[0])
......@@ -578,10 +606,13 @@ def check_sparsity(tensor, func_name=CheckMethod.CHECK_1D, n=2, m=4):
t = t.reshape(shape[0] * shape[1], shape[2])
# 4d-tensor conv (h, w, in, out) -> (h*w*out, in) in GemmConvKernel Op
elif len(shape) == 4:
t = t.transpose([0, 1, 3,
2]).reshape([shape[0] * shape[1] * shape[3], shape[2]])
t = t.transpose([0, 1, 3, 2]).reshape(
[shape[0] * shape[1] * shape[3], shape[2]]
)
else:
raise ValueError("The dimension of input tensor is not supported in create_mask, " \
"Only dimension < 4 is supported but got {}".format(len(shape)))
raise ValueError(
"The dimension of input tensor is not supported in create_mask, "
"Only dimension < 4 is supported but got {}".format(len(shape))
)
return func(t, n=n, m=m)
......@@ -1352,12 +1352,13 @@ class ParameterMetaClass(VariableMetaClass):
@six.add_metaclass(VariableMetaClass)
class Variable(object):
"""
**Notes**:
**The constructor of Variable should not be invoked directly.**
**In Static Graph Mode: Please use** `Block.create_var` **to create a Static variable which has no data until being feed.**
Notes:
The constructor of Variable should not be invoked directly.
In Static Graph Mode: Please use ** `Block.create_var` ** to create a Static variable which has no data until being feed.
**In Dygraph Mode: Please use** :ref:`api_fluid_dygraph_to_variable` **to create a dygraph variable with real data**
In Dygraph Mode: Please use ** :ref:`api_fluid_dygraph_to_variable` ** to create a dygraph variable with real data.
In Fluid, every input and output of an OP is a variable. In most
cases, variables are used for holding different kinds of data or training
......@@ -1514,12 +1515,13 @@ class Variable(object):
def detach(self):
"""
Returns a new Variable, detached from the current graph.
It will share data with origin Variable and without tensor copy.
In addition, the detached Variable doesn't provide gradient propagation.
Returns:
( :ref:`api_guide_Variable_en` | dtype is same as current Variable): The detached Variable.
( :ref:`api_guide_Variable_en` | dtype is same as current Variable), The detached Variable.
Examples:
.. code-block:: python
......@@ -1533,6 +1535,7 @@ class Variable(object):
# create a detached Variable
y = x.detach()
"""
assert (
......@@ -2085,6 +2088,7 @@ class Variable(object):
@property
def T(self):
"""
Permute current Variable with its dimensions reversed.
If `n` is the dimensions of `x` , `x.T` is equivalent to `x.transpose([n-1, n-2, ..., 0])`.
......@@ -2103,6 +2107,7 @@ class Variable(object):
x_T_np = exe.run(paddle.static.default_main_program(), fetch_list=[x_T])[0]
print(x_T_np.shape)
# (5, 3, 2)
"""
if len(self.shape) == 1:
return self
......@@ -2141,7 +2146,7 @@ class Variable(object):
as ``out = assign(tensor)`` .
Returns:
Variable: The cloned Variable.
Variable, The cloned Variable.
Examples:
.. code-block:: python
......@@ -2171,6 +2176,7 @@ class Variable(object):
def _set_error_clip(self, error_clip):
"""
Set the error_clip.
Args:
......@@ -2178,11 +2184,13 @@ class Variable(object):
Returns:
None
"""
self.error_clip = error_clip
def _set_info(self, key, value):
"""
Set key-value information for this variable.
Args:
......@@ -2191,6 +2199,7 @@ class Variable(object):
Returns:
None
"""
if not hasattr(self, "_info"):
self._info = {}
......@@ -2198,6 +2207,7 @@ class Variable(object):
def _get_info(self, key):
"""
Get the information of this variable corresponding to key.
Args:
......@@ -2205,6 +2215,7 @@ class Variable(object):
Returns:
object
"""
if hasattr(self, "_info") and key in self._info:
return self._info[key]
......@@ -2212,7 +2223,9 @@ class Variable(object):
def _slice_indices(self, slice, length):
"""
Reference implementation for the slice.indices method.
"""
# Compute step and length as integers.
step = 1 if slice.step is None else slice.step
......@@ -2383,7 +2396,7 @@ class Variable(object):
Default: None
Returns:
Tensor: the value in given scope.
Tensor, the value in given scope.
Examples:
.. code-block:: python
......@@ -2438,6 +2451,7 @@ class Variable(object):
def set_value(self, value, scope=None):
'''
Set the value to the tensor in given scope.
Args:
......@@ -2477,6 +2491,7 @@ class Variable(object):
if var.persistable:
t_load = paddle.load(path+var.name+'.pdtensor')
var.set_value(t_load)
'''
# The 'framework' is a low-level module, and 'executor'
......@@ -2547,10 +2562,11 @@ class Variable(object):
def size(self):
"""
Returns the number of elements for current Variable, which is a int64 Variable with shape [1]
Returns:
Variable: the number of elements for current Variable
Variable, the number of elements for current Variable
Examples:
.. code-block:: python
......@@ -2564,6 +2580,7 @@ class Variable(object):
# get the number of elements of the Variable
y = x.size()
"""
output = self.block.create_var(
......@@ -2578,23 +2595,27 @@ class Variable(object):
def _set_attr(self, name, val):
"""
Set the value of attribute by attribute's name.
Args:
name(str): the attribute name.
val(int|str|list): the value of the attribute.
"""
self._update_desc_attr(name, val)
def _has_attr(self, name):
"""
Whether this Variable has the attribute with the name `name` or not.
Args:
name(str): the attribute name.
Returns:
bool: True if has this attribute.
bool, True if has this attribute.
"""
return self.desc.has_attr(name)
......@@ -2624,7 +2645,7 @@ class Variable(object):
name(str): the attribute name.
Returns:
int|str|list: The attribute value. The return value
int|str|list, The attribute value. The return value
can be any valid attribute type.
"""
return self.desc.attr(name)
......@@ -3196,14 +3217,16 @@ class Operator(object):
def input(self, name):
r"""
Get the input arguments according to the input parameter name.
Args:
name(str): The input parameter name.
Returns:
list: return the list of argument names that associated with \
list, return the list of argument names that associated with \
the specific parameter name.
"""
return self.desc.input(name)
......
......@@ -20,7 +20,13 @@ from __future__ import print_function
import warnings
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant
from ..framework import Variable, _non_static_mode, _varbase_creator, _in_legacy_dygraph, in_dygraph_mode
from ..framework import (
Variable,
_non_static_mode,
_varbase_creator,
_in_legacy_dygraph,
in_dygraph_mode,
)
from .. import core
from ..param_attr import ParamAttr
from . import nn
......@@ -33,22 +39,29 @@ __all__ = ['accuracy', 'auc']
def accuracy(input, label, k=1, correct=None, total=None):
"""
accuracy layer.
Refer to the https://en.wikipedia.org/wiki/Precision_and_recall
This function computes the accuracy using the input and label.
If the correct label occurs in top k predictions, then correct will increment by one.
Note: the dtype of accuracy is determined by input. the input and label dtype can be different.
Note:
the dtype of accuracy is determined by input. the input and label dtype can be different.
Args:
input(Tensor): The input of accuracy layer, which is the predictions of network. A Tensor with type float32,float64.
The shape is ``[sample_number, class_dim]`` .
label(Tensor): The label of dataset. Tensor with type int32,int64. The shape is ``[sample_number, 1]`` .
k(int): The top k predictions for each class will be checked. Data type is int64 or int32.
correct(Tensor): The correct predictions count. A Tensor with type int64 or int32.
total(Tensor): The total entries count. A tensor with type int64 or int32.
k(int, optional): The top k predictions for each class will be checked. Data type is int64 or int32. Default is 1.
correct(Tensor, optional): The correct predictions count. A Tensor with type int64 or int32. Default is None.
total(Tensor, optional): The total entries count. A tensor with type int64 or int32. Default is None.
Returns:
Tensor: The correct rate. A Tensor with type float32.
Tensor, The correct rate. A Tensor with type float32.
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.static as static
......@@ -68,6 +81,7 @@ def accuracy(input, label, k=1, correct=None, total=None):
fetch_list=[result[0]])
print(output)
#[array([0.], dtype=float32)]
"""
if _non_static_mode():
if correct is None:
......@@ -76,15 +90,18 @@ def accuracy(input, label, k=1, correct=None, total=None):
total = _varbase_creator(dtype="int32")
_k = k.numpy().item(0) if isinstance(k, Variable) else k
topk_out, topk_indices = _legacy_C_ops.top_k_v2(input, 'k', _k,
'sorted', False)
_acc, _, _ = _legacy_C_ops.accuracy(topk_out, topk_indices, label,
correct, total)
topk_out, topk_indices = _legacy_C_ops.top_k_v2(
input, 'k', _k, 'sorted', False
)
_acc, _, _ = _legacy_C_ops.accuracy(
topk_out, topk_indices, label, correct, total
)
return _acc
helper = LayerHelper("accuracy", **locals())
check_variable_and_dtype(input, 'input', ['float16', 'float32', 'float64'],
'accuracy')
check_variable_and_dtype(
input, 'input', ['float16', 'float32', 'float64'], 'accuracy'
)
topk_out = helper.create_variable_for_type_inference(dtype=input.dtype)
topk_indices = helper.create_variable_for_type_inference(dtype="int64")
inputs = {"X": [input]}
......@@ -93,39 +110,38 @@ def accuracy(input, label, k=1, correct=None, total=None):
else:
attrs = {'k': k}
attrs['sorted'] = False
helper.append_op(type="top_k_v2",
inputs=inputs,
attrs=attrs,
outputs={
"Out": [topk_out],
"Indices": [topk_indices]
})
helper.append_op(
type="top_k_v2",
inputs=inputs,
attrs=attrs,
outputs={"Out": [topk_out], "Indices": [topk_indices]},
)
acc_out = helper.create_variable_for_type_inference(dtype="float32")
if correct is None:
correct = helper.create_variable_for_type_inference(dtype="int32")
if total is None:
total = helper.create_variable_for_type_inference(dtype="int32")
helper.append_op(type="accuracy",
inputs={
"Out": [topk_out],
"Indices": [topk_indices],
"Label": [label]
},
outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
})
helper.append_op(
type="accuracy",
inputs={"Out": [topk_out], "Indices": [topk_indices], "Label": [label]},
outputs={
"Accuracy": [acc_out],
"Correct": [correct],
"Total": [total],
},
)
return acc_out
def auc(input,
label,
curve='ROC',
num_thresholds=2**12 - 1,
topk=1,
slide_steps=1,
ins_tag_weight=None):
def auc(
input,
label,
curve='ROC',
num_thresholds=2**12 - 1,
topk=1,
slide_steps=1,
ins_tag_weight=None,
):
"""
**Area Under the Curve (AUC) Layer**
......@@ -216,13 +232,14 @@ def auc(input,
helper = LayerHelper("auc", **locals())
if ins_tag_weight is None:
ins_tag_weight = tensor.fill_constant(shape=[1, 1],
dtype="float32",
value=1.0)
ins_tag_weight = tensor.fill_constant(
shape=[1, 1], dtype="float32", value=1.0
)
check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'auc')
check_variable_and_dtype(label, 'label', ['int32', 'int64'], 'auc')
check_variable_and_dtype(ins_tag_weight, 'ins_tag_weight',
['float32', 'float64'], 'auc')
check_variable_and_dtype(
ins_tag_weight, 'ins_tag_weight', ['float32', 'float64'], 'auc'
)
auc_out = helper.create_variable_for_type_inference(dtype="float64")
batch_auc_out = helper.create_variable_for_type_inference(dtype="float64")
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
......@@ -236,62 +253,71 @@ def auc(input,
batch_stat_pos = helper.create_global_variable(
persistable=True,
dtype='int64',
shape=[(1 + slide_steps) * (num_thresholds + 1) + 1])
shape=[(1 + slide_steps) * (num_thresholds + 1) + 1],
)
batch_stat_neg = helper.create_global_variable(
persistable=True,
dtype='int64',
shape=[(1 + slide_steps) * (num_thresholds + 1) + 1])
shape=[(1 + slide_steps) * (num_thresholds + 1) + 1],
)
# for global auc
# Needn't maintain the batch id
stat_pos = helper.create_global_variable(persistable=True,
dtype='int64',
shape=[1, num_thresholds + 1])
stat_neg = helper.create_global_variable(persistable=True,
dtype='int64',
shape=[1, num_thresholds + 1])
stat_pos = helper.create_global_variable(
persistable=True, dtype='int64', shape=[1, num_thresholds + 1]
)
stat_neg = helper.create_global_variable(
persistable=True, dtype='int64', shape=[1, num_thresholds + 1]
)
for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]:
helper.set_variable_initializer(var, Constant(value=0.0,
force_cpu=False))
helper.set_variable_initializer(
var, Constant(value=0.0, force_cpu=False)
)
#"InsTagWeight": [ins_tag_weight]
# "InsTagWeight": [ins_tag_weight]
# Batch AUC
helper.append_op(type="auc",
inputs={
"Predict": [input],
"Label": [label],
"StatPos": [batch_stat_pos],
"StatNeg": [batch_stat_neg]
},
attrs={
"curve": curve,
"num_thresholds": num_thresholds,
"slide_steps": slide_steps
},
outputs={
"AUC": [batch_auc_out],
"StatPosOut": [batch_stat_pos],
"StatNegOut": [batch_stat_neg]
})
helper.append_op(
type="auc",
inputs={
"Predict": [input],
"Label": [label],
"StatPos": [batch_stat_pos],
"StatNeg": [batch_stat_neg],
},
attrs={
"curve": curve,
"num_thresholds": num_thresholds,
"slide_steps": slide_steps,
},
outputs={
"AUC": [batch_auc_out],
"StatPosOut": [batch_stat_pos],
"StatNegOut": [batch_stat_neg],
},
)
# Global AUC
helper.append_op(type="auc",
inputs={
"Predict": [input],
"Label": [label],
"StatPos": [stat_pos],
"StatNeg": [stat_neg]
},
attrs={
"curve": curve,
"num_thresholds": num_thresholds,
"slide_steps": 0
},
outputs={
"AUC": [auc_out],
"StatPosOut": [stat_pos],
"StatNegOut": [stat_neg]
})
return auc_out, batch_auc_out, [
batch_stat_pos, batch_stat_neg, stat_pos, stat_neg
]
helper.append_op(
type="auc",
inputs={
"Predict": [input],
"Label": [label],
"StatPos": [stat_pos],
"StatNeg": [stat_neg],
},
attrs={
"curve": curve,
"num_thresholds": num_thresholds,
"slide_steps": 0,
},
outputs={
"AUC": [auc_out],
"StatPosOut": [stat_pos],
"StatNegOut": [stat_neg],
},
)
return (
auc_out,
batch_auc_out,
[batch_stat_pos, batch_stat_neg, stat_pos, stat_neg],
)
此差异已折叠。
......@@ -241,13 +241,13 @@ def send_ue_recv(
src_index (Tensor): An 1-D tensor, and the available data type is int32, int64.
dst_index (Tensor): An 1-D tensor, and should have the same shape as `src_index`.
The available data type is int32, int64.
message_op (str): Different message ops for x and e, including `add`, `sub`, `mul`, `div`.
reduce_op (str): Different reduce ops, including `sum`, `mean`, `max`, `min`.
message_op (str, optional): Different message ops for x and e, including `add`, `sub`, `mul`, `div`.
reduce_op (str, optional): Different reduce ops, including `sum`, `mean`, `max`, `min`.
Default value is `sum`.
out_size (int|Tensor|None): We can set `out_size` to get necessary output shape. If not set or
out_size (int|Tensor, optional): We can set `out_size` to get necessary output shape. If not set or
out_size is smaller or equal to 0, then this input will not be used.
Otherwise, `out_size` should be equal with or larger than
max(dst_index) + 1.
max(dst_index) + 1. Default value is `None`.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
......
此差异已折叠。
......@@ -32,6 +32,7 @@ def sample_neighbors(
name=None,
):
"""
Graph Sample Neighbors API.
This API is mainly used in Graph Learning domain, and the main purpose is to
......@@ -52,16 +53,16 @@ def sample_neighbors(
The data type should be the same with `row`.
input_nodes (Tensor): The input nodes we need to sample neighbors for, and the
data type should be the same with `row`.
sample_size (int): The number of neighbors we need to sample. Default value is -1,
sample_size (int, optional): The number of neighbors we need to sample. Default value is -1,
which means returning all the neighbors of the input nodes.
eids (Tensor): The eid information of the input graph. If return_eids is True,
eids (Tensor, optional): The eid information of the input graph. If return_eids is True,
then `eids` should not be None. The data type should be the
same with `row`. Default is None.
return_eids (bool): Whether to return eid information of sample edges. Default is False.
perm_buffer (Tensor): Permutation buffer for fisher-yates sampling. If `use_perm_buffer`
return_eids (bool, optional): Whether to return eid information of sample edges. Default is False.
perm_buffer (Tensor, optional): Permutation buffer for fisher-yates sampling. If `use_perm_buffer`
is True, then `perm_buffer` should not be None. The data type should
be the same with `row`. If not None, we will use fiser-yates sampling
to speed up. Only useful for gpu version.
to speed up. Only useful for gpu version. Default is None.
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
......@@ -69,15 +70,16 @@ def sample_neighbors(
- out_neighbors (Tensor), the sample neighbors of the input nodes.
- out_count (Tensor), the number of sampling neighbors of each input node, and the shape
should be the same with `input_nodes`.
should be the same with `input_nodes`.
- out_eids (Tensor), if `return_eids` is True, we will return the eid information of the
sample edges.
sample edges.
Examples:
.. code-block:: python
import paddle
# edges: (3, 0), (7, 0), (0, 1), (9, 1), (1, 2), (4, 3), (2, 4),
# (9, 5), (3, 5), (9, 6), (1, 6), (9, 8), (7, 8)
row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7]
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -1180,7 +1180,8 @@ def triu(x, diagonal=0, name=None):
def meshgrid(*args, **kwargs):
"""
Takes a list of N tensors as input *args, each of which is 1-dimensional vector, and creates N-dimensional grids.
Takes a list of N tensors as input :attr:`*args`, each of which is 1-dimensional vector, and creates N-dimensional grids.
Args:
*args(Tensor|list of Tensor) : tensors (tuple(list) of tensor): the shapes of input k tensors are (N1,),
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册