未验证 提交 310edc0d 编写于 作者: L Leo Chen 提交者: GitHub

Update layers used in ptb model to use auto-generated op functions in dygraph mode (#21724)

* update layers, test=develop

* fix input numpy, test=develop

* fix bugs, test=develop

* follow commments, test=develop

* update getitem, test=develop
上级 f17bd178
......@@ -188,7 +188,25 @@ class VarBase {
}
}
framework::proto::VarType::Type DataType() const { return data_type_; }
framework::proto::VarType::Type DataType() const {
const framework::Tensor* tensor = nullptr;
if (var_.IsInitialized()) {
if (type_ == framework::proto::VarType::LOD_TENSOR) {
tensor = &(var_.Get<framework::LoDTensor>());
} else if (type_ == framework::proto::VarType::SELECTED_ROWS) {
tensor = &(var_.Get<framework::SelectedRows>().value());
} else {
VLOG(6) << "Variable " << name_ << " is not initialized";
return data_type_;
}
}
if (tensor && tensor->IsInitialized()) {
return tensor->type();
} else {
VLOG(6) << "The tensor of variable " << name_ << " is not initialized";
return data_type_;
}
}
void ClearGradient();
......
......@@ -182,9 +182,9 @@ class Layer(core.Layer):
if parallel_helper._is_data_parallel_mode():
parallel_helper._broadcast_parameters(
self._parameters.values())
self._built = True
outputs = self.forward(*inputs, **kwargs)
self._built = True
return outputs
def forward(self, *inputs, **kwargs):
......
......@@ -216,7 +216,8 @@ def monkey_patch_math_varbase():
setattr(core.VarBase, method_name,
_elemwise_method_creator_(method_name, op_type, reverse,
scalar_method)),
scalar_method))
# b = -a
core.VarBase.__neg__ = _neg_
core.VarBase.astype = astype
......@@ -19,7 +19,7 @@ from six.moves import reduce
from .. import core
from ..layers import utils
from . import layers
from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter
from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter, _dygraph_tracer_
from ..param_attr import ParamAttr
from ..initializer import Normal, Constant, NumpyArrayInitializer
import numpy as np
......@@ -1542,18 +1542,24 @@ class Embedding(layers.Layer):
self._w = value
def forward(self, input):
attrs = {
'is_sparse': self._is_sparse,
'is_distributed': self._is_distributed,
'remote_prefetch': self._remote_prefetch,
'padding_idx': self._padding_idx
}
if in_dygraph_mode():
inputs = {'Ids': [input], 'W': [self._w]}
outs = core.ops.lookup_table_v2(inputs, attrs)
return outs['Out'][0]
out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type='lookup_table_v2',
inputs={'Ids': input,
'W': self._w},
outputs={'Out': out},
attrs={
'is_sparse': self._is_sparse,
'is_distributed': self._is_distributed,
'remote_prefetch': self._remote_prefetch,
'padding_idx': self._padding_idx
})
attrs=attrs)
return out
......
......@@ -208,7 +208,67 @@ def monkey_patch_varbase():
self.shape)
def __getitem__(self, item):
return _getitem_impl_(self, item)
if not isinstance(item, tuple):
item = [item]
decrease_axis = []
slice_axis = []
slice_start = []
slice_end = []
reverse_axis = []
for dim, slice_item in enumerate(item):
if isinstance(slice_item, slice):
start = slice_item.start
end = slice_item.stop
step = slice_item.step if slice_item.step else 1
assert (step == 1 or step == -1)
if step == -1:
reverse_axis.append(dim)
assert (start is None and end is None)
if start is None and end is None:
continue
if start is None:
start = 0
if end is None:
end = 10000000
slice_axis.append(dim)
slice_start.append(start)
slice_end.append(end)
else:
# int
decrease_axis.append(dim)
slice_axis.append(dim)
slice_start.append(slice_item)
slice_end.append(slice_item + 1
if slice_item != -1 else 10000000)
out = self
if len(slice_axis) > 0:
# append slice_op here
inputs = {'Input': [out]}
attrs = {
'axes': slice_axis,
'starts': slice_start,
'ends': slice_end,
'decrease_axis': decrease_axis
}
outs = core.ops.slice(inputs, attrs)
out = outs['Out'][0]
if len(reverse_axis) > 0:
inputs = {'X': [out]}
attrs = {'axis': reverse_axis}
outs = core.ops.reverse(inputs, attrs)
out = outs['Out'][0]
return out
for method_name, method in (("set_value", set_value), ("block", block),
("backward", backward), ("gradient", gradient),
......
......@@ -274,16 +274,15 @@ class GradClipByGlobalNorm(GradClipBase):
norm_global = layers.reduce_sum(norm_global)
norm_global = layers.sqrt(norm_global)
clip_scale = layers.elementwise_div(
x=self.max_global_norm,
y=layers.elementwise_max(
x=norm_global, y=self.max_global_norm))
clip_scale = self.max_global_norm / (layers.elementwise_max(
x=norm_global, y=self.max_global_norm))
for p, g in para_and_grad:
if g is None:
out.append((p, g))
continue
new_grad = layers.elementwise_mul(x=g, y=clip_scale)
new_grad = g * clip_scale
out.append((p, new_grad))
......
......@@ -15,6 +15,8 @@
from __future__ import print_function
from . import framework
from . import core
from .framework import in_dygraph_mode
import numpy as np
from .wrapped_decorator import signature_safe_contextmanager
from .core import VarDesc
......
......@@ -20,7 +20,7 @@ import string
from six.moves import cStringIO
from ..proto import framework_pb2
from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype_
from ..framework import OpProtoHolder, Variable, core, convert_np_dtype_to_dtype_, in_dygraph_mode
from ..layer_helper import LayerHelper
from ..data_feeder import check_type_and_dtype
......@@ -252,9 +252,16 @@ def generate_activation_fn(op_type):
op_proto = OpProtoHolder.instance().get_op_proto(op_type)
def func(x, name=None):
helper = LayerHelper(op_type, **locals())
if in_dygraph_mode():
inputs = {'X': [x]}
op = getattr(core.ops, op_type)
outs = op(inputs)
return outs['Out'][0]
check_type_and_dtype(x, 'x', Variable,
['float16', 'float32', 'float64'], op_type)
helper = LayerHelper(op_type, **locals())
output = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type=op_type, inputs={"X": x}, outputs={"Out": output})
return output
......
......@@ -19,7 +19,8 @@ from functools import partial, reduce
from . import nn
from .layer_function_generator import templatedoc
from ..layer_helper import LayerHelper
from ..framework import Variable
from ..framework import Variable, in_dygraph_mode
from .. import core
from ..data_feeder import check_type_and_dtype
from ..param_attr import ParamAttr
from ..initializer import NumpyArrayInitializer
......@@ -1213,6 +1214,21 @@ def softmax_with_cross_entropy(logits,
out = fluid.layers.softmax_with_cross_entropy(
logits=fc, label=label)
"""
attrs = {
'soft_label': soft_label,
'ignore_index': ignore_index,
'numeric_stable_mode': numeric_stable_mode,
'axis': axis
}
if in_dygraph_mode():
inputs = {'Logits': [logits], 'Label': [label]}
outs = core.ops.softmax_with_cross_entropy(inputs, attrs)
if not return_softmax:
return outs['Loss'][0]
else:
return outs['Loss'][0], outs['Softmax'][0]
helper = LayerHelper('softmax_with_cross_entropy', **locals())
softmax = helper.create_variable_for_type_inference(dtype=logits.dtype)
loss = helper.create_variable_for_type_inference(dtype=logits.dtype)
......@@ -1222,12 +1238,7 @@ def softmax_with_cross_entropy(logits,
'Label': label},
outputs={'Softmax': softmax,
'Loss': loss},
attrs={
'soft_label': soft_label,
'ignore_index': ignore_index,
'numeric_stable_mode': numeric_stable_mode,
'axis': axis
})
attrs=attrs)
if return_softmax:
return loss, softmax
......
......@@ -285,6 +285,7 @@ def monkey_patch_variable():
setattr(Variable, method_name,
_elemwise_method_creator_(method_name, op_type, reverse,
scalar_method))
# b = -a
Variable.__neg__ = _neg_
Variable.astype = astype
此差异已折叠。
......@@ -16,10 +16,11 @@ from __future__ import print_function
from six.moves import reduce
from ..layer_helper import LayerHelper
from ..param_attr import ParamAttr
from ..framework import convert_np_dtype_to_dtype_
from ..framework import convert_np_dtype_to_dtype_, in_dygraph_mode
from ..framework import Variable
from ..initializer import Constant, force_init_on_cpu
from ..core import VarDesc
from .. import core
from .layer_function_generator import templatedoc
from ..data_feeder import check_type_and_dtype, check_type, check_dtype, convert_dtype
import numpy
......@@ -251,7 +252,16 @@ def concat(input, axis=0, name=None):
# [11 12 13]
# [14 15 16]]
"""
helper = LayerHelper('concat', **locals())
if in_dygraph_mode():
inputs = {'X': input}
if not isinstance(axis, int):
raise TypeError(
"Input 'axis' in concat must be int in Dygraph mode.")
attrs = {'axis': axis}
outs = core.ops.concat(inputs, attrs)
return outs['Out'][0]
if not isinstance(input, list):
warnings.warn(
"The type of input in concat should be list, but received %s." %
......@@ -270,6 +280,7 @@ def concat(input, axis=0, name=None):
else:
attrs['axis'] = axis
helper = LayerHelper('concat', **locals())
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
helper.append_op(
type='concat', inputs=inputs, outputs={'Out': [out]}, attrs=attrs)
......
......@@ -31,6 +31,7 @@ from .layer_helper import LayerHelper
from .layers import ops
from .regularizer import append_regularization_ops
from .dygraph import base as imperative_base
from .dygraph import no_grad
from .dygraph.learning_rate_scheduler import LearningRateDecay
from paddle.fluid import core
from paddle.fluid.layers import tensor
......@@ -747,9 +748,20 @@ class SGDOptimizer(Optimizer):
name=name)
self.type = "sgd"
@no_grad
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
if framework.in_dygraph_mode():
inputs = {
"Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]],
"LearningRate": [self._create_param_lr(param_and_grad)]
}
attrs = {}
outputs = {'ParamOut': [param_and_grad[0]]}
outs = core.ops.sgd(inputs, attrs, outputs)
return outs['ParamOut'][0]
assert isinstance(block, framework.Block)
# create the optimize op
sgd_op = block.append_op(
type=self.type,
......
......@@ -296,6 +296,7 @@ class TestImperative(unittest.TestCase):
var_inp = fluid.dygraph.base.to_variable(np_inp)
var_inp.stop_gradient = False
l = MyLayer("my_layer")
print(var_inp)
x = l(var_inp)[0]
self.assertIsNotNone(x)
dy_out = x.numpy()
......@@ -386,12 +387,14 @@ class TestImperative(unittest.TestCase):
self.assertEqual(len(sublayers), 2)
def test_dygraph_vs_static(self):
inp1 = np.random.rand(4, 3, 3)
inp2 = np.random.rand(4, 3, 3)
np_inp1 = np.random.rand(4, 3, 3)
np_inp2 = np.random.rand(4, 3, 3)
# dynamic graph
with fluid.dygraph.guard():
if np.sum(inp1) < np.sum(inp2):
inp1 = fluid.dygraph.to_variable(np_inp1)
inp2 = fluid.dygraph.to_variable(np_inp2)
if np.sum(np_inp1) < np.sum(np_inp2):
x = fluid.layers.elementwise_add(inp1, inp2)
else:
x = fluid.layers.elementwise_sub(inp1, inp2)
......@@ -429,8 +432,8 @@ class TestImperative(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
static_result = exe.run(fluid.default_main_program(),
feed={'inp1': inp1,
'inp2': inp2},
feed={'inp1': np_inp1,
'inp2': np_inp2},
fetch_list=out)[0]
self.assertTrue(np.allclose(dygraph_result, static_result))
......
......@@ -33,6 +33,7 @@ import paddle.fluid.layers as layers
from test_imperative_base import new_program_scope
from paddle.fluid.dygraph import nn
from paddle.fluid.dygraph import base
from paddle.fluid.dygraph import to_variable
class LayerTest(unittest.TestCase):
......@@ -515,11 +516,11 @@ class TestLayer(LayerTest):
fetch_list=[ret])[0]
with self.dynamic_graph():
ret = layers.elementwise_add(n, n2)
ret = layers.elementwise_pow(ret, n3)
ret = layers.elementwise_div(ret, n4)
ret = layers.elementwise_sub(ret, n5)
dy_ret = layers.elementwise_mul(ret, n6)
ret = layers.elementwise_add(to_variable(n), to_variable(n2))
ret = layers.elementwise_pow(ret, to_variable(n3))
ret = layers.elementwise_div(ret, to_variable(n4))
ret = layers.elementwise_sub(ret, to_variable(n5))
dy_ret = layers.elementwise_mul(ret, to_variable(n6))
dy_ret_value = dy_ret.numpy()
self.assertTrue(np.allclose(static_ret, dy_ret_value))
......@@ -528,8 +529,8 @@ class TestLayer(LayerTest):
n2 = np.ones([3, 3], dtype='float32') * 2
with self.dynamic_graph():
min_ret = layers.elementwise_min(n, n2)
max_ret = layers.elementwise_max(n, n2)
min_ret = layers.elementwise_min(to_variable(n), to_variable(n2))
max_ret = layers.elementwise_max(to_variable(n), to_variable(n2))
min_ret_value = min_ret.numpy()
max_ret_value = max_ret.numpy()
......
......@@ -100,6 +100,7 @@ class TestVarBase(unittest.TestCase):
with fluid.dygraph.guard():
var = fluid.dygraph.to_variable(self.array)
self.assertTrue(np.array_equal(var[1, :].numpy(), self.array[1, :]))
self.assertTrue(np.array_equal(var[::-1].numpy(), self.array[::-1]))
def test_var_base_to_np(self):
with fluid.dygraph.guard():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册