未验证 提交 364b0b0a 编写于 作者: W wangzhen38 提交者: GitHub

[remove fluid] under unittesets of linear api (#48564)

* [remove fluid] under unittesets of linear api

* [remove fluid] under unittesets of linear api

* [remove fluid] under unittesets of linear api

* [remove fluid] under unittesets of linear api

* [remove fluid] under unittesets of linear api

* [remove fluid] under unittesets of linear api

* [remove fluid] fluid dygrapn linear api

* [remove fluid] fluid dygrapn linear api

* [remove fluid] fluid dygrapn linear api
上级 33fa2684
......@@ -91,7 +91,7 @@ def group_sharded_parallel(
# required: distributed
import paddle
from paddle.fluid.dygraph.nn import Linear
from paddle.nn import Linear
from paddle.distributed import fleet
from paddle.distributed.sharding import group_sharded_parallel
......@@ -238,7 +238,7 @@ def save_group_sharded_model(model, output, optimizer=None):
# required: distributed
import paddle
from paddle.fluid.dygraph.nn import Linear
from paddle.nn import Linear
from paddle.distributed import fleet
from paddle.distributed.sharding import group_sharded_parallel, save_group_sharded_model
......
......@@ -23,7 +23,7 @@ from paddle.optimizer import Adam
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.nn import Sequential
from paddle.fluid.dygraph import Linear
from paddle.nn import Linear
from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose
from paddle.fluid.log_helper import get_logger
from paddle.fluid.framework import _test_eager_guard
......@@ -111,7 +111,7 @@ class ModelForConv2dT(nn.Layer):
def __init__(self, num_classes=10):
super().__init__()
self.features = nn.Conv2DTranspose(4, 6, (3, 3))
self.fc = Linear(input_dim=600, output_dim=num_classes)
self.fc = Linear(600, num_classes)
def forward(self, inputs):
x = self.features(inputs)
......@@ -143,11 +143,9 @@ class ImperativeLenet(paddle.nn.Layer):
)
self.fc = Sequential(
Linear(input_dim=400, output_dim=120),
Linear(input_dim=120, output_dim=84),
Linear(
input_dim=84, output_dim=num_classes, act=classifier_activation
),
Linear(400, 120),
Linear(120, 84),
Linear(84, num_classes),
)
def forward(self, inputs):
......
......@@ -821,11 +821,12 @@ class ReduceLROnPlateau(LearningRateDecay):
.. code-block:: python
import paddle.fluid as fluid
import paddle
import numpy as np
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = fluid.dygraph.Linear(10, 10)
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
reduce_lr = fluid.dygraph.ReduceLROnPlateau(
......@@ -842,7 +843,7 @@ class ReduceLROnPlateau(LearningRateDecay):
total_loss = 0
for bath_id in range(5):
out = linear(input)
loss = fluid.layers.reduce_mean(out)
loss = paddle.mean(out)
total_loss += loss
adam.minimize(loss)
......@@ -1090,9 +1091,10 @@ class StepDecay(_LearningRateEpochDecay):
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = fluid.dygraph.Linear(10, 10)
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.StepDecay(0.5, step_size=3)
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
......@@ -1100,7 +1102,7 @@ class StepDecay(_LearningRateEpochDecay):
for epoch in range(9):
for batch_id in range(5):
out = linear(input)
loss = fluid.layers.reduce_mean(out)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
......@@ -1170,9 +1172,10 @@ class MultiStepDecay(_LearningRateEpochDecay):
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = fluid.dygraph.Linear(10, 10)
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.MultiStepDecay(0.5, milestones=[3, 5])
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
......@@ -1180,7 +1183,7 @@ class MultiStepDecay(_LearningRateEpochDecay):
for epoch in range(6):
for batch_id in range(5):
out = linear(input)
loss = fluid.layers.reduce_mean(out)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
......@@ -1255,9 +1258,10 @@ class LambdaDecay(_LearningRateEpochDecay):
import paddle.fluid as fluid
import numpy as np
import paddle
with fluid.dygraph.guard():
x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
linear = fluid.dygraph.Linear(10, 10)
linear = paddle.nn.Linear(10, 10)
input = fluid.dygraph.to_variable(x)
scheduler = fluid.dygraph.LambdaDecay(0.5, lr_lambda=lambda x: 0.95**x)
adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
......@@ -1265,7 +1269,7 @@ class LambdaDecay(_LearningRateEpochDecay):
for epoch in range(6):
for batch_id in range(5):
out = linear(input)
loss = fluid.layers.reduce_mean(out)
loss = paddle.mean(out)
adam.minimize(loss)
scheduler.epoch()
......
此差异已折叠。
......@@ -165,12 +165,12 @@ def monkey_patch_varbase():
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import Linear
from paddle.nn import Linear
import numpy as np
data = np.ones([3, 1024], dtype='float32')
with fluid.dygraph.guard():
linear = fluid.dygraph.Linear(1024, 4)
linear = Linear(1024, 4)
t = to_variable(data)
linear(t) # call with default weight
custom_weight = np.random.randn(1024, 4).astype("float32")
......
......@@ -39,8 +39,10 @@ __all__ = ['run_check']
class SimpleLayer(Layer):
def __init__(self, input_size):
super().__init__()
self._linear1 = nn.Linear(
input_size, 3, param_attr=ParamAttr(initializer=Constant(value=0.1))
self._linear1 = paddle.nn.Linear(
input_size,
3,
weight_attr=ParamAttr(initializer=Constant(value=0.1)),
)
def forward(self, inputs):
......
......@@ -475,9 +475,10 @@ class Optimizer:
.. code-block:: python
import paddle.fluid as fluid
import paddle
with fluid.dygraph.guard():
linear = fluid.dygraph.nn.Linear(10, 10)
linear = paddle.nn.Linear(10, 10)
adam = fluid.optimizer.Adam(0.1, parameter_list=linear.parameters())
......@@ -576,6 +577,7 @@ class Optimizer:
import paddle.fluid as fluid
import numpy as np
import paddle
# example1: LearningRateDecay is not used, return value is all the same
with fluid.dygraph.guard():
......@@ -587,10 +589,10 @@ class Optimizer:
# example2: PiecewiseDecay is used, return the step learning rate
with fluid.dygraph.guard():
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = fluid.dygraph.nn.Linear(10, 10)
linear = paddle.nn.Linear(10, 10)
inp = fluid.dygraph.to_variable(inp)
out = linear(inp)
loss = fluid.layers.reduce_mean(out)
loss = paddle.mean(out)
bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0]
......@@ -1340,12 +1342,13 @@ class Optimizer:
.. code-block:: python
import paddle.fluid as fluid
import paddle
import numpy as np
with fluid.dygraph.guard():
value = np.arange(26).reshape(2, 13).astype("float32")
a = fluid.dygraph.to_variable(value)
linear = fluid.Linear(13, 5, dtype="float32")
linear = paddle.nn.Linear(13, 5)
# This can be any optimizer supported by dygraph.
adam = fluid.optimizer.Adam(learning_rate = 0.01,
parameter_list = linear.parameters())
......
......@@ -18,7 +18,7 @@ from test_dist_base import TestParallelDyGraphRunnerBase, runtime_main
import paddle
import paddle.fluid as fluid
import paddle.nn.functional as F
from paddle.fluid.dygraph import Embedding, Layer, Linear, to_variable
from paddle.fluid.dygraph import Embedding, Layer, to_variable
from paddle.optimizer.lr import NoamDecay
"""
......@@ -269,8 +269,8 @@ class PrePostProcessLayer(Layer):
class PositionwiseFeedForwardLayer(Layer):
def __init__(self, d_inner_hid, d_hid, dropout_rate):
super().__init__()
self._i2h = Linear(d_hid, d_inner_hid, act="relu")
self._h2o = Linear(d_inner_hid, d_hid)
self._i2h = paddle.nn.Linear(d_hid, d_inner_hid)
self._h2o = paddle.nn.Linear(d_inner_hid, d_hid)
self._dropout_rate = dropout_rate
def forward(self, x):
......@@ -304,10 +304,18 @@ class MultiHeadAttentionLayer(Layer):
self._d_value = d_value
self._d_model = d_model
self._dropout_rate = dropout_rate
self._q_fc = Linear(self._d_model, d_key * n_head, bias_attr=False)
self._k_fc = Linear(self._d_model, d_key * n_head, bias_attr=False)
self._v_fc = Linear(self._d_model, d_value * n_head, bias_attr=False)
self._proj_fc = Linear(d_value * n_head, self._d_model, bias_attr=False)
self._q_fc = paddle.nn.Linear(
self._d_model, d_key * n_head, bias_attr=False
)
self._k_fc = paddle.nn.Linear(
self._d_model, d_key * n_head, bias_attr=False
)
self._v_fc = paddle.nn.Linear(
self._d_model, d_value * n_head, bias_attr=False
)
self._proj_fc = paddle.nn.Linear(
d_value * n_head, self._d_model, bias_attr=False
)
def forward(self, queries, keys, values, attn_bias):
# compute q ,k ,v
......@@ -825,7 +833,9 @@ class WrapDecoderLayer(Layer):
)
self._weight_sharing = weight_sharing
if not weight_sharing:
self._fc = Linear(d_model, trg_vocab_size, bias_attr=False)
self._fc = paddle.nn.Linear(
d_model, trg_vocab_size, bias_attr=False
)
def forward(self, dec_inputs=None, enc_output=None):
trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias = dec_inputs
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册