未验证 提交 d2ef888b 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] some python c api use final state (#45221)

some python c api use final state
上级 016b94c2
......@@ -432,7 +432,7 @@ std::vector<int64_t> CastPyArg2Longs(PyObject* obj,
i));
}
}
} else {
} else if ((PyObject*)obj != Py_None) { // NOLINT
PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument (position %d) must be "
"list or tuple, but got %s",
......
......@@ -40,13 +40,14 @@
func : acosh
backward : acosh_grad
- api : adadelta
- api : adadelta_
args : (Tensor param, Tensor grad, Tensor avg_squared_grad, Tensor avg_squared_update, float rho, float epsilon)
output : Tensor(param_out), Tensor(moment_out), Tensor(inf_norm_out)
infer_meta :
func : AdadeltaInferMeta
kernel :
func : adadelta
inplace : (param -> param_out), (avg_squared_grad -> moment_out), (avg_squared_update -> inf_norm_out)
- api : adagrad_
args : (Tensor param, Tensor grad, Tensor moment, Tensor learning_rate, float epsilon)
......@@ -71,13 +72,14 @@
optional : master_param, skip_update
inplace : (param -> param_out), (moment1 -> moment1_out), (moment2 -> moment2_out), (beta1_pow -> beta1_pow_out), (beta2_pow -> beta2_pow_out), (master_param -> master_param_outs)
- api : adamax
- api : adamax_
args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment, Tensor inf_norm, Tensor beta1_pow, float beta1, float beta2, float epsilon)
output : Tensor(param_out), Tensor(avg_squared_grad_out), Tensor(avg_squared_update_out)
infer_meta :
func : AdamaxInferMeta
kernel :
func : adamax
inplace : (param -> param_out), (moment -> avg_squared_grad_out), (inf_norm -> avg_squared_update_out)
- api : adamw_
args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, Scalar beta1, Scalar beta2, Scalar epsilon, float lr_ratio, float coeff, bool with_decay, bool lazy_mode, int64_t min_row_size_to_use_multithread, bool multi_precision, bool use_global_beta_pow)
......@@ -540,7 +542,7 @@
backward : conj_grad
- api : conv2d
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
args : (Tensor input, Tensor filter, int[] strides, int[] paddings, str padding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search)
output : Tensor
infer_meta :
func : ConvInferMeta
......@@ -1919,7 +1921,6 @@
func : NormInferMeta
kernel :
func : norm
intermediate : norm
backward : norm_grad
- api : not_equal
......
......@@ -22,6 +22,7 @@ PD_REGISTER_KERNEL(size,
CPU,
ALL_LAYOUT,
phi::SizeKernel,
uint8_t,
int16_t,
int,
int64_t,
......
......@@ -205,6 +205,7 @@ OP_NAMEMAPPING = {
'elementwise_sub': 'final_state_subtract',
'elementwise_mul': 'final_state_multiply',
'elementwise_div': 'final_state_divide',
'elementwise_mod': 'final_state_modulo',
}
......@@ -4656,7 +4657,15 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):
if dim is not None and not isinstance(dim, list):
dim = [dim]
if _non_static_mode():
if in_dygraph_mode():
reduce_all = True if dim == None or dim == [] or len(dim) == len(
input.shape) else False
dim = dim if dim != None and dim != [] else [0]
if reduce_all:
return _C_ops.final_state_sum(input, [], None, keep_dim)
else:
return _C_ops.final_state_sum(input, dim, None, keep_dim)
elif _in_legacy_dygraph():
reduce_all = True if dim == None or dim == [] or len(dim) == len(
input.shape) else False
dim = dim if dim != None and dim != [] else [0]
......@@ -5003,11 +5012,11 @@ def reduce_all(input, dim=None, keep_dim=False, name=None):
# keep_dim=True, x.shape=(2,2), out.shape=(2,1)
"""
if dim is not None and not isinstance(dim, list):
dim = [dim]
check_variable_and_dtype(input, 'input', ('bool'), 'reduce_all')
helper = LayerHelper('reduce_all', **locals())
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(type='reduce_all',
inputs={'X': input},
outputs={'Out': out},
......@@ -6446,7 +6455,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
elif isinstance(shape, tmp_tensor_type):
# TODO: Tensor shape in final_state_reshape has not been tested
shape.stop_gradient = True
out, _ = _C_ops.reshape2(x, shape)
out = _C_ops.final_state_reshape(x, shape)
else:
raise ValueError(
"shape must be an instance of `list`, `tuple` or `Variable`,"
......@@ -15742,7 +15751,12 @@ def uniform_random(shape,
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
if _non_static_mode():
if in_dygraph_mode():
shape = utils.convert_shape_to_list(shape)
return _C_ops.final_state_uniform_random(shape, dtype, float(min),
float(max), seed,
_current_expected_place())
elif _in_legacy_dygraph():
shape = utils.convert_shape_to_list(shape)
return _C_ops.uniform_random('shape', shape, 'min', float(min), 'max',
float(max), 'seed', seed, 'dtype', dtype)
......
......@@ -2811,7 +2811,13 @@ class AdamaxOptimizer(Optimizer):
param_and_grad[0])
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param_and_grad[0])
if framework._non_static_mode():
if framework.in_dygraph_mode():
_C_ops.final_state_adamax_(param_and_grad[0], param_and_grad[1],
self._create_param_lr(param_and_grad),
moment, inf_norm, beta1_pow_acc,
self._beta1, self._beta2, self._epsilon)
elif framework._in_legacy_dygraph():
_C_ops.adamax(param_and_grad[0], param_and_grad[1],
self._create_param_lr(param_and_grad), moment,
inf_norm, beta1_pow_acc, param_and_grad[0], moment,
......@@ -3191,7 +3197,12 @@ class AdadeltaOptimizer(Optimizer):
avg_squared_update_acc = self._get_accumulator(
self._avg_squared_update_acc_str, param_and_grad[0])
if framework._non_static_mode():
if framework.in_dygraph_mode():
_C_ops.final_state_adadelta_(param_and_grad[0], param_and_grad[1],
avg_squared_grad_acc,
avg_squared_update_acc, self._rho,
self._epsilon)
elif framework._in_legacy_dygraph():
_C_ops.adadelta(param_and_grad[0], param_and_grad[1],
avg_squared_grad_acc, avg_squared_update_acc,
param_and_grad[0], avg_squared_grad_acc,
......
......@@ -1483,6 +1483,11 @@ def pad(x, pad, mode='constant', value=0, data_format="NCHW", name=None):
pad, (list, tuple)) and len(pad) == x_dim * 2:
paddings = pad
pad_value = value
if in_dygraph_mode():
out = _C_ops.final_state_pad(x, paddings, float(pad_value))
return out
check_variable_and_dtype(x, 'x', [
'float16', 'float32', 'float64', 'int32', 'int64', 'complex64',
'complex128'
......
......@@ -421,7 +421,15 @@ def conv1d(x,
squeeze_aixs = -3 if channel_last else -2
x = unsqueeze(x, axis=[squeeze_aixs])
if in_dynamic_mode():
if in_dygraph_mode():
l_type = "final_state_" + l_type
out = getattr(_C_ops,
l_type)(x, weight, stride, padding, padding_algorithm,
groups, dilation, conv2d_data_format, False, -1,
False, False, use_cudnn)
if bias is not None:
out = nn.elementwise_add(out, bias, axis=channel_dim)
elif _in_legacy_dygraph():
attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
'fuse_relu_before_depthwise_conv', False, "padding_algorithm",
......@@ -892,7 +900,15 @@ def conv1d_transpose(x,
x = unsqueeze(x, axis=[squeeze_axis])
weight = unsqueeze(weight, axis=[-1])
if in_dynamic_mode():
if in_dygraph_mode():
op_type = "final_state_" + op_type
out = getattr(_C_ops,
op_type)(x, weight, stride, padding, output_padding,
output_size, padding_algorithm, groups, dilation,
conv2d_data_format)
if bias is not None:
out = nn.elementwise_add(out, bias, axis=channel_dim)
elif _in_legacy_dygraph():
attrs = ('output_padding', output_padding, 'output_size', output_size,
'strides', stride, 'paddings', padding, 'padding_algorithm',
padding_algorithm, 'dilations', dilation, 'groups', groups,
......
......@@ -411,7 +411,11 @@ def square_error_cost(input, label):
# [0.01, 0.01]
"""
if _non_static_mode():
if in_dygraph_mode():
minus_out = _C_ops.final_state_subtract(input, label)
square_out = _C_ops.final_state_square(minus_out)
return square_out
elif _in_legacy_dygraph():
minus_out = _C_ops.elementwise_sub(input, label)
square_out = _C_ops.square(minus_out)
return square_out
......@@ -634,8 +638,8 @@ def binary_cross_entropy(input,
out = _C_ops.final_state_multiply(out, weight, 'axis', -1)
if reduction == 'sum':
return _C_ops.reduce_sum(out, 'dim', [0], 'keep_dim', False,
"reduce_all", True)
return _C_ops.final_state_sum(out, [], None, False)
elif reduction == 'mean':
return _C_ops.final_state_mean_all(out)
else:
......@@ -773,19 +777,32 @@ def binary_cross_entropy_with_logits(logit,
"should be 'sum', 'mean' or 'none', but received %s, which is not allowed."
% reduction)
if _non_static_mode():
if in_dygraph_mode():
one = _C_ops.final_state_full([1], float(1.0),
core.VarDesc.VarType.FP32,
_current_expected_place())
out = _C_ops.final_state_sigmoid_cross_entropy_with_logits(
logit, label, False, -100)
if in_dygraph_mode():
one = _C_ops.final_state_full([1], float(1.0),
core.VarDesc.VarType.FP32,
_current_expected_place())
out = _C_ops.final_state_sigmoid_cross_entropy_with_logits(
logit, label, False, -100)
if pos_weight is not None:
log_weight = _C_ops.final_state_add(
_C_ops.final_state_multiply(
label, _C_ops.final_state_subtract(pos_weight, one)), one)
out = _C_ops.final_state_multiply(out, log_weight)
if weight is not None:
out = _C_ops.final_state_multiply(out, weight)
if reduction == "sum":
return _C_ops.final_state_sum(out, [], None, False)
elif reduction == "mean":
return _C_ops.final_state_mean_all(out)
else:
one = _varbase_creator(dtype=logit.dtype)
_C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
'dtype', one.dtype, 'str_value', '1.0',
'shape', [1])
out = _C_ops.sigmoid_cross_entropy_with_logits(logit, label)
return out
elif _in_legacy_dygraph():
one = _varbase_creator(dtype=logit.dtype)
_C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
'dtype', one.dtype, 'str_value', '1.0', 'shape',
[1])
out = _C_ops.sigmoid_cross_entropy_with_logits(logit, label)
if pos_weight is not None:
log_weight = _C_ops.elementwise_add(
_C_ops.elementwise_mul(label,
......@@ -927,7 +944,7 @@ def hsigmoid_loss(input,
input, weight, label, path_table, path_code, bias, num_classes,
is_sparse, 0, [], [], [], is_sparse)
return out
if _non_static_mode():
elif _in_legacy_dygraph():
out, _, _ = _C_ops.hierarchical_sigmoid(input, weight, label,
path_table, path_code, bias,
'num_classes', num_classes,
......@@ -1126,10 +1143,10 @@ def margin_ranking_loss(input,
out = _C_ops.final_state_multiply(out, label)
if margin != 0.0:
margin = fluid.dygraph.base.to_variable([margin], dtype=out.dtype)
out = _C_ops.elementwise_add(out, margin)
out = _C_ops.relu(out)
out = _C_ops.final_state_add(out, margin)
out = _C_ops.final_state_relu(out)
if reduction == 'sum':
return _C_ops.reduce_sum(out, 'reduce_all', True)
return _C_ops.final_state_sum(out, [], None, False)
elif reduction == 'mean':
return _C_ops.final_state_mean_all(out)
return out
......@@ -1257,11 +1274,10 @@ def l1_loss(input, label, reduction='mean', name=None):
if reduction == 'mean':
return _C_ops.final_state_mean_all(unreduced)
elif reduction == 'sum':
return _C_ops.reduce_sum(unreduced, 'dim', [0], 'keep_dim', False,
'reduce_all', True)
return _C_ops.final_state_sum(unreduced, [], None, False)
else:
return unreduced
elif in_dynamic_mode():
elif _in_legacy_dygraph():
unreduced = _elementwise_op_in_dygraph(input,
label,
axis=-1,
......@@ -1360,15 +1376,15 @@ def nll_loss(input,
c = input_shape[1]
if in_dygraph_mode():
if input_dims != 2 and input_dims != 4:
input, _ = _C_ops.reshape2(input, None, 'shape', [n, c, 1, -1])
label, _ = _C_ops.reshape2(label, None, 'shape', [n, 1, -1])
input = _C_ops.final_state_reshape(input, [n, c, 1, -1])
label = _C_ops.final_state_reshape(label, [n, 1, -1])
out_shape = [n] + input_shape[2:]
out, total_weight = _C_ops.final_state_nll_loss(input, label, weight,
ignore_index, reduction)
if input_dims != 2 and input_dims != 4 and reduction == 'none':
out, _ = _C_ops.reshape2(out, None, 'shape', out_shape)
out = _C_ops.final_state_reshape(out, out_shape)
return out
if _in_legacy_dygraph():
elif _in_legacy_dygraph():
if input_dims != 2 and input_dims != 4:
input, _ = _C_ops.reshape2(input, None, 'shape', [n, c, 1, -1])
label, _ = _C_ops.reshape2(label, None, 'shape', [n, 1, -1])
......@@ -1495,11 +1511,19 @@ def kl_div(input, label, reduction='mean', name=None):
label.dtype) == 'float32':
label = paddle.cast(label, 'float64')
if _non_static_mode():
if _in_legacy_dygraph():
out = _C_ops.kldiv_loss(input, label, 'reduction', 'none')
else:
out = _C_ops.final_state_kldiv_loss(input, label, 'none')
if in_dygraph_mode():
out = _C_ops.final_state_kldiv_loss(input, label, 'none')
if reduction == 'mean':
out = paddle.mean(out)
elif reduction == 'sum':
out = paddle.sum(out)
elif reduction == 'batchmean':
if len(input.shape) > 0:
batch_size = input.shape[0]
out = paddle.sum(out) / batch_size
return out
elif _in_legacy_dygraph():
out = _C_ops.kldiv_loss(input, label, 'reduction', 'none')
if reduction == 'mean':
out = paddle.mean(out)
elif reduction == 'sum':
......@@ -1938,7 +1962,7 @@ def margin_cross_entropy(logits,
return loss
else:
return loss, softmax
elif paddle.in_dynamic_mode():
elif _in_legacy_dygraph():
softmax, loss = _C_ops.margin_cross_entropy(
logits, label, 'ring_id', ring_id, 'rank', rank, 'nranks', nranks,
'margin1', margin1, 'margin2', margin2, 'margin3', margin3, 'scale',
......@@ -2286,7 +2310,7 @@ def cross_entropy(input,
if input_dims - 1 == label_dims:
label = paddle.unsqueeze(label, axis=axis)
if _non_static_mode():
if in_dygraph_mode():
if soft_label == False:
valid_label = paddle.cast(label != ignore_index,
dtype=label.dtype) * label
......@@ -2310,15 +2334,130 @@ def cross_entropy(input,
ignore_index, 'numeric_stable_mode', True, 'axis', axis,
'use_softmax', use_softmax)
else:
if in_dygraph_mode():
_, out = _C_ops.final_state_cross_entropy_with_softmax(
input, label, soft_label, use_softmax, True, ignore_index,
axis)
if _in_legacy_dygraph():
_, out = _C_ops.softmax_with_cross_entropy(
_, out = _C_ops.final_state_cross_entropy_with_softmax(
input, label, soft_label, use_softmax, True, ignore_index, axis)
if weight is not None:
# trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
if soft_label == True:
# chajchaj:
# weight's shape is C, where C is class num.
# for 1d case: label's shape is [N,C], weight_gather's shape is N.
# for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W].
weight_gather = paddle.matmul(x=paddle.cast(
label, weight.dtype),
y=weight,
transpose_x=False,
transpose_y=True)
out_shape = list(out.shape)
weight_gather_reshape = reshape(weight_gather, shape=out_shape)
out = paddle.cast(out, weight_gather_reshape.dtype)
out = _C_ops.final_state_multiply(out, weight_gather_reshape)
else:
if input.shape[axis] != weight.shape[-1]:
raise ValueError(
"input's class_dimension({}) must equal to "
"weight's class_dimension({}) "
"when weight is provided" \
.format(input.shape[axis], weight.shape[-1]))
ignore_weight_mask = paddle.cast((label != ignore_index),
out.dtype)
if ignore_weight_mask.ndim > 1 and ignore_weight_mask.shape[
axis] == 1:
# TODO: Temporarily use squeeze instead of squeeze_
ignore_weight_mask = paddle.squeeze(ignore_weight_mask,
axis)
if axis != -1 and axis != valid_label.ndim - 1:
temp_perm = list(range(axis % valid_label.ndim)) \
+ list(range((axis % valid_label.ndim + 1), valid_label.ndim)) \
+ [axis % valid_label.ndim]
weight_gather = _C_ops.final_state_gather_nd(
weight, valid_label.transpose(temp_perm))
else:
weight_gather = _C_ops.final_state_gather_nd(
weight, valid_label)
weight_gather = _C_ops.final_state_multiply(
weight_gather, ignore_weight_mask)
input_shape = list(label.shape)
weight_gather_reshape = reshape(weight_gather,
shape=input_shape)
out = paddle.cast(out, weight_gather_reshape.dtype)
out = _C_ops.final_state_multiply(out, weight_gather_reshape)
if reduction == "sum":
# because of fluid_softmax_with_cross_entropy op's inner logic,
# in the out tensor of this op, the loss of sample with class_index==ignore_index is 0
# so, reduce_sum all directly is ok
return _C_ops.final_state_sum(out, [], None, False)
elif reduction == "mean":
# 1. if weight==none,
# numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
# denominator: count sample num with class_index!=ignore_index
# 2. else
# numerator: loss's weighted sum
# denominator: cal the sum of weight where the sample's class_index!=ignore_index
if ignore_index >= 0:
out_sum = _C_ops.final_state_sum(out, [], None, False)
# for each label[i],set 1 or 0, according to ignore_index
# mask[i]=0, if label[i]==ignore_index
# mask[i]=1, otherwise
mask = (label != ignore_index)
if weight is None:
mask = paddle.cast(mask, dtype=out_sum.dtype)
count = _C_ops.final_state_sum(mask, [], None, False)
ret = out_sum / (count + (count == 0.0))
else:
mask = paddle.cast(mask, weight_gather_reshape.dtype)
weight_ignored = _C_ops.final_state_multiply(
mask, weight_gather_reshape)
weight_sum = _C_ops.final_state_sum(weight_ignored, [],
None, False)
ret = out_sum / (weight_sum + (weight_sum == 0.0))
return ret
elif weight is not None:
out_sum = _C_ops.final_state_sum(out, [], None, False)
total_weight = _C_ops.final_state_sum(weight_gather_reshape, [],
None, False)
return out_sum / (total_weight + (total_weight == 0.0))
else:
return _C_ops.final_state_mean_all(out)
else:
if input_dims - 1 == label_dims:
out = paddle.squeeze(out, axis=axis)
return out
elif _in_legacy_dygraph():
if soft_label == False:
valid_label = paddle.cast(label != ignore_index,
dtype=label.dtype) * label
label_min = paddle.min(valid_label)
label_max = paddle.max(valid_label)
if label_min < 0:
raise ValueError("Target {} is out of lower bound.".format(
label_min.item()))
if label_max >= input.shape[axis]:
raise ValueError("Target {} is out of upper bound.".format(
label_max.item()))
if core.is_compiled_with_npu() or core.is_compiled_with_mlu():
if soft_label == False:
_, _, out = _C_ops.softmax_with_cross_entropy(
input, valid_label, 'soft_label', soft_label,
'ignore_index', ignore_index, 'numeric_stable_mode', True,
'axis', axis, 'use_softmax', use_softmax)
else:
_, _, out = _C_ops.softmax_with_cross_entropy(
input, label, 'soft_label', soft_label, 'ignore_index',
ignore_index, 'numeric_stable_mode', True, 'axis', axis,
'use_softmax', use_softmax)
else:
_, out = _C_ops.softmax_with_cross_entropy(
input, label, 'soft_label', soft_label, 'ignore_index',
ignore_index, 'numeric_stable_mode', True, 'axis', axis,
'use_softmax', use_softmax)
if weight is not None:
......@@ -2406,11 +2545,7 @@ def cross_entropy(input,
'reduce_all', True)
return out_sum / (total_weight + (total_weight == 0.0))
else:
if in_dygraph_mode():
return _C_ops.final_state_mean_all(out)
else:
return _C_ops.mean(out)
return _C_ops.mean(out)
else:
if input_dims - 1 == label_dims:
out = paddle.squeeze(out, axis=axis)
......@@ -2641,7 +2776,7 @@ def sigmoid_focal_loss(logit,
loss = _C_ops.final_state_multiply(alpha_t, loss)
gamma = fluid.dygraph.base.to_variable([gamma], dtype=loss.dtype)
gamma_t = _C_ops.final_state_pow(_C_ops.elementwise_sub(one, p_t),
gamma_t = _C_ops.final_state_pow(_C_ops.final_state_subtract(one, p_t),
gamma)
loss = _C_ops.final_state_multiply(gamma_t, loss)
......
......@@ -171,7 +171,7 @@ class Dirac(Initializer):
idx_list.append(offset)
if framework.in_dygraph_mode():
with fluid.dygraph.no_grad():
tmp_out, _ = _C_ops.reshape2(out_var, None, 'shape', [-1])
tmp_out = _C_ops.final_state_reshape(out_var, [-1])
tmp_out._share_underline_tensor_to(out_var)
else:
x_shape = block.create_var(name=unique_name.generate(".".join(
......@@ -239,15 +239,12 @@ class Dirac(Initializer):
tmp_out = _C_ops.final_state_scatter(out_var, index_tensor,
value_tensor, True)
tmp_out._share_underline_tensor_to(out_var)
tmp_reshape_out, _ = _C_ops.reshape2(out_var, None, 'shape',
origin_shape)
tmp_reshape_out = _C_ops.final_state_reshape(
out_var, origin_shape)
tmp_reshape_out._share_underline_tensor_to(out_var)
if var.dtype != VarDesc.VarType.FP32:
tmp_cast_out = _C_ops.cast(out_var, 'in_dtype',
out_var.dtype, 'out_dtype',
var.dtype)
tmp_cast_out = _C_ops.final_state_cast(out_var, var.dtype)
tmp_cast_out._share_underline_tensor_to(var)
else:
op = block.append_op(type="scatter",
inputs={
......
......@@ -17,6 +17,8 @@ from ...fluid.data_feeder import check_variable_and_dtype
from ...fluid import framework
from ...tensor import diag, transpose, sign, qr, reshape
from paddle.utils import unique_name
from ...fluid.dygraph import no_grad
from paddle import _C_ops
__all__ = []
......@@ -101,6 +103,30 @@ class Orthogonal(Initializer):
flatten_shape = [max(row, col), min(row, col)]
if framework.in_dygraph_mode():
with no_grad():
place = framework._current_expected_place()
normal_var = _C_ops.final_state_gaussian_random(
flatten_shape, 0.0, 1.0, self._seed, var.dtype, place)
q, r = _C_ops.final_state_qr(normal_var, 'reduced')
r_diag = _C_ops.final_state_diag(r, 0, 0)
r_sign = _C_ops.final_state_sign(r_diag)
q = _C_ops.final_state_multiply(q, r_sign)
if row < col:
q = _C_ops.final_state_transpose(q, [1, 0])
q = _C_ops.final_state_reshape(q, var.shape)
tmp = _C_ops.final_state_scale(q, self._gain, 0.0, True)
tmp._share_underline_tensor_to(var)
return None
normal_var = block.create_var(name=unique_name.generate('.'.join(
['gaussian_random', 'tmp'])),
dtype=var.dtype,
......
......@@ -24,7 +24,7 @@ def _inplace_reshape_dygraph(x, shape):
x_shape = _varbase_creator(dtype='int64')
if in_dygraph_mode():
with paddle.fluid.dygraph.no_grad():
tmp_out, _ = _C_ops.reshape2(x, None, 'shape', shape)
tmp_out = _C_ops.final_state_reshape(x, shape)
tmp_out._share_underline_tensor_to(x)
else:
_dygraph_tracer().trace_op(type="reshape2",
......@@ -103,8 +103,7 @@ def parameters_to_vector(parameters, name=None):
out = _varbase_creator(dtype=dtype)
if in_dygraph_mode():
with paddle.fluid.dygraph.no_grad():
tmp = _varbase_creator()
_C_ops.concat(parameters, tmp, 'axis', 0)
tmp = _C_ops.final_state_concat(parameters, 0)
tmp._share_underline_tensor_to(out)
else:
_dygraph_tracer().trace_op(type='concat',
......@@ -153,11 +152,13 @@ def vector_to_parameters(vec, parameters, name=None):
numel = reduce(lambda x, y: x * y, shape)
sections.append(numel)
if len(sections) == 1:
sections.append(0)
if in_dygraph_mode():
with paddle.fluid.dygraph.no_grad():
res = [_varbase_creator() for n in range(len(parameters))]
_C_ops.split(vec, res, 'axis', 0, 'sections', sections)
for i in range(0, len(res)):
res = _C_ops.final_state_split(vec, sections, 0)
for i in range(0, len(parameters)):
res[i]._share_underline_tensor_to(parameters[i])
else:
_dygraph_tracer().trace_op(type='split',
......
......@@ -18,6 +18,8 @@ from ...fluid import dygraph
from ...fluid import layers as F
from ...fluid.layer_helper import LayerHelper
from ...fluid.data_feeder import check_variable_and_dtype
from ...framework import in_dygraph_mode
from paddle import _C_ops
__all__ = []
......@@ -25,6 +27,12 @@ __all__ = []
def l2_norm(x, axis, epsilon=1e-12, name=None):
if len(x.shape) == 1:
axis = 0
if in_dygraph_mode():
out, norm = _C_ops.final_state_norm(x, 1 if axis is None else axis,
epsilon, False)
return paddle.squeeze(norm, axis=[axis])
check_variable_and_dtype(x, "X", ("float32", "float64"), "norm")
helper = LayerHelper("l2_normalize", **locals())
......
......@@ -16,6 +16,9 @@ from .optimizer import Optimizer
from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable, name_scope
from ..framework import in_dygraph_mode
from paddle import _C_ops
from ..fluid.dygraph import no_grad
__all__ = []
......@@ -144,9 +147,6 @@ class Adadelta(Optimizer):
self._add_accumulator(self._avg_squared_update_acc_str, p)
def _append_optimize_op(self, block, param_and_grad):
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
if isinstance(param_and_grad, dict):
param_and_grad = self._update_param_group(param_and_grad)
......@@ -155,6 +155,18 @@ class Adadelta(Optimizer):
avg_squared_update_acc = self._get_accumulator(
self._avg_squared_update_acc_str, param_and_grad[0])
if in_dygraph_mode():
with no_grad():
_C_ops.final_state_adadelta_(param_and_grad[0],
param_and_grad[1],
avg_squared_grad_acc,
avg_squared_update_acc, self._rho,
self._epsilon)
return None
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
# Create the adadelta optimizer op
adadelta_op = block.append_op(type=self.type,
inputs={
......
......@@ -17,6 +17,7 @@ from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable, name_scope
from paddle import _C_ops
from ..fluid.dygraph import no_grad
__all__ = []
......@@ -190,7 +191,12 @@ class Adamax(Optimizer):
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
param_and_grad[0])
if framework._non_static_mode():
if framework.in_dygraph_mode():
_C_ops.final_state_adamax_(param_and_grad[0], param_and_grad[1],
self._create_param_lr(param_and_grad),
moment, inf_norm, beta1_pow_acc,
self._beta1, self._beta2, self._epsilon)
elif framework._in_legacy_dygraph():
_C_ops.adamax(param_and_grad[0], param_and_grad[1],
self._create_param_lr(param_and_grad), moment,
inf_norm, beta1_pow_acc, param_and_grad[0], moment,
......@@ -230,6 +236,14 @@ class Adamax(Optimizer):
for param, grad in parameters_and_grads:
if grad is None or param.stop_gradient is True:
continue
if framework.in_dygraph_mode():
beta1_pow_acc = self._get_accumulator(
self._beta1_pow_acc_str, param)
with no_grad():
tmp = _C_ops.final_state_scale(beta1_pow_acc,
self._beta1, 0.0, True)
beta1_pow_acc.copy_(tmp, False)
continue
with param.block.program._optimized_guard(
[param, grad]), name_scope('adamax'):
beta1_pow_acc = self._get_accumulator(
......@@ -243,6 +257,17 @@ class Adamax(Optimizer):
for param, grad in parameters_and_grads['params']:
if grad is None or param.stop_gradient is True:
continue
if framework.in_dygraph_mode():
beta1_pow_acc = self._get_accumulator(
self._beta1_pow_acc_str, param)
self._beta1 = parameters_and_grads.get(
'beta1', self._default_dict['beta1'])
with no_grad():
tmp = _C_ops.final_state_scale(beta1_pow_acc,
self._beta1, 0.0, True)
beta1_pow_acc.copy_(tmp, False)
continue
with param.block.program._optimized_guard(
[param, grad]), name_scope('adamax'):
beta1_pow_acc = self._get_accumulator(
......
......@@ -1597,7 +1597,7 @@ def assign(x, output=None):
# but _non_static_mode()==False under @to_static, which means
# isinstance(VarBase, Variable) == False. It will cause return None
# after this api.
if isinstance(input, (Variable, core.VarBase)):
if isinstance(input, (Variable, core.VarBase, core.eager.Tensor)):
if in_dygraph_mode():
if output is None:
output = _C_ops.final_state_assign(input)
......@@ -1622,14 +1622,16 @@ def assign(x, output=None):
# We now support the form of [var, VAR...] if the Var.shape=[1,]
if len(input.shape) > 0 and any(isinstance(x, Variable) for x in input):
# We only deal with the case where the list is nested one level, convert all scalars into variables, and then use stack to process. It is necessary to ensure the consistency of types.
if not all(
[x.shape == (1, ) for x in input if isinstance(x, Variable)]):
if not all([
x.shape == (1, ) for x in input
if isinstance(x, (Variable, core.eager.Tensor))
]):
raise TypeError(
"Unsupport paddle.assign([Variable, Variable...]) with non-scalar variable."
)
def convert_scalar(x):
if not isinstance(x, Variable):
if not isinstance(x, (Variable, core.eager.Tensor)):
return assign(x)
return x
......
......@@ -3614,6 +3614,8 @@ def strided_slice(x, axes, starts, ends, strides, name=None):
sliced_2 = paddle.strided_slice(x, axes=axes, starts=[minus_3, 0, 2], ends=ends, strides=strides_2)
# sliced_2 is x[:, 1:3:1, 0:2:1, 2:4:2].
"""
if in_dygraph_mode():
return _C_ops.final_state_strided_slice(x, axes, starts, ends, strides)
helper = LayerHelper('strided_slice', **locals())
......@@ -3657,7 +3659,7 @@ def strided_slice(x, axes, starts, ends, strides, name=None):
attrs = {'axes': axes}
infer_flags = list(1 for i in range(len(axes)))
if _non_static_mode():
if _in_legacy_dygraph():
inputs = {'Input': x}
attrs = {
'axes': axes,
......
......@@ -424,6 +424,7 @@ OP_NAMEMAPPING = {
'elementwise_sub': 'final_state_subtract',
'elementwise_mul': 'final_state_multiply',
'elementwise_div': 'final_state_divide',
'elementwise_mod': 'final_state_modulo',
}
@dygraph_only
......@@ -4466,7 +4467,7 @@ def diff(x, n=1, axis=-1, prepend=None, append=None, name=None):
dtype = x.dtype
axes = [axis]
infer_flags = list(1 for i in range(len(axes)))
if paddle.in_dynamic_mode():
if in_dygraph_mode():
has_pend = False
input_list = []
if prepend is not None and append is not None:
......@@ -4479,8 +4480,7 @@ def diff(x, n=1, axis=-1, prepend=None, append=None, name=None):
input_list = [x, append]
has_pend = True
if has_pend:
new_input = _varbase_creator()
_C_ops.concat(input_list, new_input, 'axis', axis)
new_input = _C_ops.final_state_concat(input_list, axis)
else:
new_input = x
......@@ -4493,31 +4493,59 @@ def diff(x, n=1, axis=-1, prepend=None, append=None, name=None):
attrs_1 += ('starts', starts_1)
ends_1 = [dim_len - 1]
attrs_1 += ('ends', ends_1)
if in_dygraph_mode():
input_front = _C_ops.final_state_slice(new_input, axes, starts_1, ends_1, infer_flags,
input_front = _C_ops.final_state_slice(new_input, axes, starts_1, ends_1, infer_flags,
[])
else:
input_front = _C_ops.slice(new_input, None, None, None, None, 'axes', axes, \
'infer_flags', infer_flags, *attrs_1)
starts_2 = [1]
attrs_2 += ('starts', starts_2)
ends_2 = [dim_len]
attrs_2 += ('ends', ends_2)
if in_dygraph_mode():
input_back = _C_ops.final_state_slice(new_input, axes, starts_2, ends_2, infer_flags,
input_back = _C_ops.final_state_slice(new_input, axes, starts_2, ends_2, infer_flags,
[])
if x.dtype == paddle.bool:
return _C_ops.final_state_logical_xor(input_back, input_front)
else:
input_back = _C_ops.slice(new_input, None, None, None, None, 'axes', axes, \
return elementwise_sub(input_back, input_front, axis=axis)
elif _in_legacy_dygraph():
has_pend = False
input_list = []
if prepend is not None and append is not None:
input_list = [prepend, x, append]
has_pend = True
elif prepend is not None:
input_list = [prepend, x]
has_pend = True
elif append is not None:
input_list = [x, append]
has_pend = True
if has_pend:
new_input = _varbase_creator()
_C_ops.concat(input_list, new_input, 'axis', axis)
else:
new_input = x
attrs_1 = ()
attrs_2 = ()
dim_len = new_input.shape[axis]
starts_1 = [0]
attrs_1 += ('starts', starts_1)
ends_1 = [dim_len - 1]
attrs_1 += ('ends', ends_1)
input_front = _C_ops.slice(new_input, None, None, None, None, 'axes', axes, \
'infer_flags', infer_flags, *attrs_1)
starts_2 = [1]
attrs_2 += ('starts', starts_2)
ends_2 = [dim_len]
attrs_2 += ('ends', ends_2)
input_back = _C_ops.slice(new_input, None, None, None, None, 'axes', axes, \
'infer_flags', infer_flags, *attrs_2)
if x.dtype == paddle.bool:
if in_dygraph_mode():
return _C_ops.final_state_logical_xor(input_back, input_front)
else:
return _C_ops.logical_xor(input_back, input_front)
return _C_ops.logical_xor(input_back, input_front)
else:
return elementwise_sub(input_back, input_front, axis=axis)
else:
check_variable_and_dtype(x, 'x', ['float32', 'float64', 'bool', 'int32', 'int64'], 'diff')
check_type(axis, 'axis', (int), 'diff')
......
......@@ -233,7 +233,9 @@ def numel(x, name=None):
"""
if paddle.in_dynamic_mode():
if in_dygraph_mode():
return _C_ops.final_state_size(x)
elif _in_legacy_dygraph():
return _C_ops.size(x)
if not isinstance(x, Variable):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册