未验证 提交 970db874 编写于 作者: 2 201716010711 提交者: GitHub

transfer scale api (#48356)

上级 128ef1ae
......@@ -36,6 +36,6 @@ from paddle.fluid.data_feeder import ( # noqa: F401
check_variable_and_dtype,
convert_dtype,
)
from paddle.fluid.layers import fill_constant, utils, scale # noqa: F401
from paddle.fluid.layers import fill_constant, utils # noqa: F401
from paddle.tensor.layer_function_generator import templatedoc # noqa: F401
import paddle.fluid as fluid # noqa: F401
......@@ -522,7 +522,7 @@ class _ProgramHolder:
with framework.program_guard(program):
for i, out in enumerate(self._output_descs):
var = program.global_block().var(out.name())
var = nn.scale(
var = paddle.scale(
var, 1.0, name="translated_layer/scale_{}".format(i)
)
scale_output_vars.append(var)
......
......@@ -189,6 +189,7 @@ class ListenAndServ:
.. code-block:: python
import paddle.fluid as fluid
import paddle
with fluid.program_guard(main):
serv = layers.ListenAndServ(
"127.0.0.1:6170", ["X"], optimizer_mode=False)
......@@ -199,7 +200,7 @@ class ListenAndServ:
name="X",
append_batch_size=False)
fluid.initializer.Constant(value=1.0)(x, main.global_block())
layers.scale(x=x, scale=10.0, out=out_var)
paddle.scale(x=x, scale=10.0, out=out_var)
exe = fluid.Executor(place)
exe.run(main)
......
......@@ -113,7 +113,6 @@ __all__ = [
'flatten',
'unique',
'unique_with_counts',
'scale',
'elementwise_add',
'elementwise_div',
'elementwise_sub',
......@@ -7924,103 +7923,6 @@ def _elementwise_op(helper):
return helper.append_activation(out)
def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
"""
Putting scale and bias to the input Tensor as following:
``bias_after_scale`` is True:
.. math::
Out=scale*X+bias
``bias_after_scale`` is False:
.. math::
Out=scale*(X+bias)
Args:
x(Tensor): Input N-D Tensor of scale operator. Data type can be float32, float64, int8, int16, int32, int64, uint8.
scale(float|Tensor): The scale factor of the input, it should be a float number or a Tensor with shape [1] and data type as float32.
bias(float): The bias to be put on the input.
bias_after_scale(bool): Apply bias addition after or before scaling. It is useful for numeric stability in some circumstances.
act(str, optional): Activation applied to the output such as tanh, softmax, sigmoid, relu.
name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`
Returns:
Tensor: Output tensor of scale operator, with shape and data type same as input.
Examples:
.. code-block:: python
# scale as a float32 number
import paddle
data = paddle.randn(shape=[2,3], dtype='float32')
res = paddle.scale(data, scale=2.0, bias=1.0)
.. code-block:: python
# scale with parameter scale as a Tensor
import paddle
data = paddle.randn(shape=[2, 3], dtype='float32')
factor = paddle.to_tensor([2], dtype='float32')
res = paddle.scale(data, scale=factor, bias=1.0)
"""
if in_dygraph_mode():
out = _C_ops.scale(x, scale, float(bias), bias_after_scale)
return dygraph_utils._append_activation_in_dygraph(out)
if _non_static_mode():
_scale = scale.numpy().item(0) if isinstance(scale, Variable) else scale
out = _legacy_C_ops.scale(
x,
'scale',
float(_scale),
'bias',
float(bias),
'bias_after_scale',
bias_after_scale,
)
return dygraph_utils._append_activation_in_dygraph(out)
check_variable_and_dtype(
x,
"x",
[
'float16',
'uint16',
'float32',
'float64',
'int8',
'int16',
'int32',
'int64',
'uint8',
],
"scale",
)
inputs = {'X': [x]}
attrs = {
'bias': float(bias),
'bias_after_scale': bias_after_scale,
}
if isinstance(scale, Variable):
inputs['ScaleTensor'] = [scale]
else:
attrs['scale'] = float(scale)
helper = LayerHelper('scale', **locals())
out = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(
type='scale', inputs=inputs, outputs={'Out': out}, attrs=attrs
)
return helper.append_activation(out)
def elementwise_add(x, y, axis=-1, act=None, name=None):
"""
......
......@@ -620,7 +620,7 @@ def scaled_dot_product_attention(
v = __split_heads(v, num_heads)
key_dim_per_head = keys.shape[-1] // num_heads
scaled_q = layers.scale(x=q, scale=key_dim_per_head**-0.5)
scaled_q = paddle.scale(x=q, scale=key_dim_per_head**-0.5)
product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
x = paddle.reshape(x=product, shape=[-1, product.shape[-1]])
......
......@@ -162,7 +162,7 @@ def model():
# need cos sim
inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
scale_infer = layers.scale(x=inference, scale=5.0)
scale_infer = paddle.scale(x=inference, scale=5.0)
label = layers.data(name='score', shape=[1], dtype='float32')
square_cost = layers.square_error_cost(input=scale_infer, label=label)
......
......@@ -537,7 +537,7 @@ class PrepareEncoderDecoderLayer(Layer):
def forward(self, src_word, src_pos):
src_word_emb = self._input_emb(src_word)
src_word_emb = fluid.layers.scale(
src_word_emb = paddle.scale(
x=src_word_emb, scale=self._src_emb_dim**0.5
)
# # TODO change this to fit dynamic length input
......
......@@ -1173,7 +1173,7 @@ def multi_head_attention(
"""
Scaled Dot-Product Attention
"""
scaled_q = layers.scale(x=q, scale=d_model**-0.5)
scaled_q = paddle.scale(x=q, scale=d_model**-0.5)
product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
if attn_bias:
product += attn_bias
......@@ -1305,7 +1305,7 @@ def prepare_encoder(
),
)
src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
src_word_emb = paddle.scale(x=src_word_emb, scale=src_emb_dim**0.5)
src_pos_enc = layers.embedding(
src_pos,
size=[src_max_len, src_emb_dim],
......
......@@ -276,7 +276,7 @@ class BertModelLayer(Layer):
self_attn_mask = fluid.layers.matmul(
x=input_mask, y=input_mask, transpose_y=True
)
self_attn_mask = fluid.layers.scale(
self_attn_mask = paddle.scale(
x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False
)
n_head_self_attn_mask = paddle.stack(
......
......@@ -342,7 +342,7 @@ class WrapEncoder(Layer):
def forward(self, src_word, src_pos, src_slf_attn_bias):
word_emb = self.word_embedder(src_word)
word_emb = layers.scale(x=word_emb, scale=self.emb_dim**0.5)
word_emb = paddle.scale(x=word_emb, scale=self.emb_dim**0.5)
pos_enc = self.pos_encoder(src_pos)
pos_enc.stop_gradient = True
emb = word_emb + pos_enc
......@@ -546,7 +546,7 @@ class WrapDecoder(Layer):
caches=None,
):
word_emb = self.word_embedder(trg_word)
word_emb = layers.scale(x=word_emb, scale=self.emb_dim**0.5)
word_emb = paddle.scale(x=word_emb, scale=self.emb_dim**0.5)
pos_enc = self.pos_encoder(trg_pos)
pos_enc.stop_gradient = True
emb = word_emb + pos_enc
......
......@@ -55,7 +55,7 @@ class TestBase(IPUOpTest):
x = paddle.static.data(
name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
)
out = paddle.fluid.layers.scale(x, **self.attrs)
out = paddle.scale(x, **self.attrs)
self.fetch_list = [out.name]
def run_model(self, exec_mode):
......@@ -126,7 +126,7 @@ class TestCase5(TestBase):
y = paddle.static.data(
name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
)
out = paddle.fluid.layers.scale(x, scale=y, **self.attrs)
out = paddle.scale(x, scale=y, **self.attrs)
self.fetch_list = [out.name]
......
......@@ -62,9 +62,9 @@ class TestBase(IPUOpTest):
add1 = paddle.fluid.layers.elementwise_add(x, x)
reshape = paddle.reshape(add1, **self.attrs)
add2 = paddle.fluid.layers.elementwise_add(reshape, reshape)
scale1 = paddle.fluid.layers.scale(add2)
scale2 = paddle.fluid.layers.scale(scale1, scale=1.3, bias=0.5)
scale3 = paddle.fluid.layers.scale(scale2, scale=2, bias=0.7)
scale1 = paddle.scale(add2)
scale2 = paddle.scale(scale1, scale=1.3, bias=0.5)
scale3 = paddle.scale(scale2, scale=2, bias=0.7)
fetch_list = [scale3.name]
......
......@@ -17,6 +17,7 @@ import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig, PassVersionChecker
......@@ -39,7 +40,7 @@ class TRTScaleTest(InferencePassTest):
self.fetch_list = [out]
def append_scale(self, data):
return fluid.layers.scale(
return paddle.scale(
x=data, scale=2.0, bias=-1.0, bias_after_scale=False
)
......@@ -71,7 +72,7 @@ class TRTScaleShape2Test(InferencePassTest):
self.fetch_list = [out]
def append_scale(self, data):
return fluid.layers.scale(
return paddle.scale(
x=data, scale=2.0, bias=-1.0, bias_after_scale=False
)
......
......@@ -207,7 +207,7 @@ class FusionGroupPassFillConstantTest(FusionGroupPassTest):
tmp_0 = layers.elementwise_add(self.feed_vars[0], self.feed_vars[1])
tmp_1 = layers.fill_constant(shape=[2, 2], dtype=dtype, value=2.0)
tmp_2 = layers.scale(
tmp_2 = paddle.scale(
tmp_1, scale=3.0, bias=1.0, bias_after_scale=True
)
tmp_3 = layers.elementwise_mul(tmp_2, tmp_0)
......
......@@ -131,7 +131,7 @@ class TestScaleOpSelectedRows(unittest.TestCase):
class TestScaleRaiseError(unittest.TestCase):
def test_errors(self):
def test_type():
fluid.layers.scale([10])
paddle.scale([10])
self.assertRaises(TypeError, test_type)
......
......@@ -80,7 +80,7 @@ class TestArrayReadWrite(unittest.TestCase):
self.assertEqual(outs[0], outs[1])
total_sum = layers.sums(input=[a_sum, x_sum])
total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0)
total_sum_scaled = paddle.scale(x=total_sum, scale=1 / 6.0)
append_backward(total_sum_scaled)
......@@ -117,7 +117,7 @@ class TestArrayReadWrite(unittest.TestCase):
total_sum_dygraph = layers.sums(
input=[a_sum_dygraph, x_sum_dygraph]
)
total_sum_scaled_dygraph = layers.scale(
total_sum_scaled_dygraph = paddle.scale(
x=total_sum_dygraph, scale=1 / 6.0
)
total_sum_scaled_dygraph.backward()
......
......@@ -29,6 +29,7 @@ import paddle.fluid.layers.ops as ops
from dist_test_utils import remove_ps_flag
from paddle.fluid import core
import paddle
RPC_OP_ROLE_ATTR_NAME = (
op_role_attr_name
......@@ -150,7 +151,7 @@ class TestSendOp(unittest.TestCase):
append_batch_size=False,
)
fluid.initializer.Constant(value=2.3)(x, main.global_block())
o = layers.scale(x=x, scale=10.0)
o = paddle.scale(x=x, scale=10.0)
exe = fluid.Executor(place)
self.local_out = exe.run(main, fetch_list=[o])
......
......@@ -155,7 +155,7 @@ class EagerDeletionRecurrentOpTest1(unittest.TestCase):
h_pre = rnn.memory(init=h_boot)
x_t = rnn.step_input(x)
h = layers.scale(
h = paddle.scale(
x=layers.elementwise_add(x=h_pre, y=x_t),
scale=self.py_rnn.scale,
)
......@@ -431,8 +431,8 @@ class EagerDeletionRecurrentOpMultipleMemoryTest(EagerDeletionRecurrentOpTest1):
h_pre2 = rnn.memory(init=h_boot2)
x_t = rnn.step_input(x)
mem1 = layers.scale(x=h_pre1, scale=1.0)
mem2 = layers.scale(x=h_pre2, scale=1.0)
mem1 = paddle.scale(x=h_pre1, scale=1.0)
mem2 = paddle.scale(x=h_pre2, scale=1.0)
out = layers.sums(input=[mem1, x_t, mem2])
rnn.update_memory(h_pre1, mem1)
......@@ -691,7 +691,7 @@ class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest(
h_pre = forward_only_rnn.memory(init=h_boot)
x_t = forward_only_rnn.step_input(x)
h = layers.scale(
h = paddle.scale(
x=layers.elementwise_add(x=h_pre, y=x_t),
scale=self.py_rnn.scale,
)
......@@ -707,7 +707,7 @@ class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest(
h_pre = rnn.memory(init=h_boot)
x_t = rnn.step_input(x)
h = layers.scale(
h = paddle.scale(
x=layers.elementwise_add(x=h_pre, y=x_t),
scale=self.py_rnn.scale,
)
......
......@@ -692,7 +692,7 @@ class PrepareEncoderDecoderLayer(Layer):
def forward(self, src_word, src_pos):
src_word_emb = self._input_emb(src_word)
src_word_emb = fluid.layers.scale(
src_word_emb = paddle.scale(
x=src_word_emb, scale=self._src_emb_dim**0.5
)
# # TODO change this to fit dynamic length input
......
......@@ -3632,7 +3632,7 @@ class TestBook(LayerTest):
dtype='float32',
append_batch_size=False,
)
out = layers.scale(input, scale=scale_var)
out = paddle.scale(input, scale=scale_var)
return out
def make_iou_similarity(self):
......
......@@ -151,7 +151,7 @@ class RecurrentOpTest1(unittest.TestCase):
h_pre = rnn.memory(init=h_boot)
x_t = rnn.step_input(x)
h = layers.scale(
h = paddle.scale(
x=layers.elementwise_add(x=h_pre, y=x_t),
scale=self.py_rnn.scale,
)
......@@ -419,8 +419,8 @@ class RecurrentOpMultipleMemoryTest(RecurrentOpTest1):
h_pre2 = rnn.memory(init=h_boot2)
x_t = rnn.step_input(x)
mem1 = layers.scale(x=h_pre1, scale=1.0)
mem2 = layers.scale(x=h_pre2, scale=1.0)
mem1 = paddle.scale(x=h_pre1, scale=1.0)
mem2 = paddle.scale(x=h_pre2, scale=1.0)
out = layers.sums(input=[mem1, x_t, mem2])
rnn.update_memory(h_pre1, mem1)
......
......@@ -133,7 +133,7 @@ class TestScaleOpSelectedRows(unittest.TestCase):
class TestScaleRaiseError(unittest.TestCase):
def test_errors(self):
def test_type():
fluid.layers.scale([10])
paddle.scale([10])
self.assertRaises(TypeError, test_type)
......
......@@ -161,7 +161,7 @@ def multi_head_attention(
sum_out = layers.reduce_sum(exp_out, dim=-1, keep_dim=False)
return layers.elementwise_div(x=exp_out, y=sum_out, axis=0)
scaled_q = layers.scale(x=q, scale=d_model**-0.5)
scaled_q = paddle.scale(x=q, scale=d_model**-0.5)
product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
weights = __softmax(layers.elementwise_add(x=product, y=attn_bias))
if dropout_rate:
......
......@@ -26,7 +26,6 @@ from paddle.fluid import (
CompiledProgram,
default_main_program,
Program,
layers,
unique_name,
program_guard,
)
......@@ -201,7 +200,7 @@ def normalize_program(program, feed_vars, fetch_vars):
uniq_fetch_vars = []
for i, var in enumerate(fetch_vars):
if var.dtype != paddle.bool:
var = layers.scale(
var = paddle.scale(
var, 1.0, name="save_infer_model/scale_{}".format(i)
)
uniq_fetch_vars.append(var)
......
......@@ -52,7 +52,7 @@ __inplace_unary_func__ = [
__all__ = []
# It is a hot fix in some unittest using:
# fluid.layers.scale(x=x, scale=10.0, out=out_var)
# paddle.scale(x=x, scale=10.0, out=out_var)
# e.g.: test_program_code.py, test_dist_train.py
globals()['_scale'] = generate_layer_fn('scale')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册