提交 0718113a 编写于 作者: S sneaxiy

modification

上级 d9942cd1
...@@ -130,7 +130,7 @@ paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name ...@@ -130,7 +130,7 @@ paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)) paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)) paddle.fluid.layers.l2_normalize ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None))
paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'scale', 'bias', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, 0.0, None)) paddle.fluid.layers.matmul ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None))
paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.topk ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times'], varargs=None, keywords=None, defaults=(0, False)) paddle.fluid.layers.warpctc ArgSpec(args=['input', 'label', 'blank', 'norm_by_times'], varargs=None, keywords=None, defaults=(0, False))
paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.sequence_reshape ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None)
......
...@@ -93,7 +93,6 @@ class ElementwiseMulGradKernel : public ElemwiseGradKernel<T> { ...@@ -93,7 +93,6 @@ class ElementwiseMulGradKernel : public ElemwiseGradKernel<T> {
auto* x = ctx.Input<Tensor>("X"); auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y"); auto* y = ctx.Input<Tensor>("Y");
// auto* out = ctx.Input<Tensor>("Out");
auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out")); auto* dout = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto* out = dout; // out is not necessary auto* out = dout; // out is not necessary
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X")); auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
......
...@@ -59,9 +59,8 @@ class MatMulKernel : public framework::OpKernel<T> { ...@@ -59,9 +59,8 @@ class MatMulKernel : public framework::OpKernel<T> {
RowMatrixFromVector(x.dims()), 0, context.Attr<bool>("transpose_X")); RowMatrixFromVector(x.dims()), 0, context.Attr<bool>("transpose_X"));
auto mat_dim_b = math::CreateMatrixDescriptor( auto mat_dim_b = math::CreateMatrixDescriptor(
ColumnMatrixFromVector(y.dims()), 0, context.Attr<bool>("transpose_Y")); ColumnMatrixFromVector(y.dims()), 0, context.Attr<bool>("transpose_Y"));
auto scale = static_cast<T>(context.Attr<float>("scale")); auto scale = static_cast<T>(context.Attr<float>("alpha"));
auto bias = static_cast<T>(context.Attr<float>("bias")); blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, T(0));
blas.MatMul(x, mat_dim_a, y, mat_dim_b, scale, out, bias);
} }
}; };
...@@ -188,7 +187,7 @@ class MatMulGradKernel : public framework::OpKernel<T> { ...@@ -188,7 +187,7 @@ class MatMulGradKernel : public framework::OpKernel<T> {
auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a); auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a);
auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b); auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b);
blas.MatMul(a, mat_dim_a, b, mat_dim_b, blas.MatMul(a, mat_dim_a, b, mat_dim_b,
static_cast<T>(context.Attr<float>("scale")), out, T(0)); static_cast<T>(context.Attr<float>("alpha")), out, T(0));
} }
void CalcInputGrad(const framework::ExecutionContext &context, void CalcInputGrad(const framework::ExecutionContext &context,
...@@ -337,8 +336,7 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -337,8 +336,7 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
R"DOC(If true, use the transpose of `Y`. R"DOC(If true, use the transpose of `Y`.
)DOC") )DOC")
.SetDefault(false); .SetDefault(false);
AddAttr<float>("scale", "Scale").SetDefault(1.0f); AddAttr<float>("alpha", "The scale of Out").SetDefault(1.0f);
AddAttr<float>("bias", "Bias").SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
MatMul Operator. MatMul Operator.
......
...@@ -53,6 +53,11 @@ $$Out = scale*X$$ ...@@ -53,6 +53,11 @@ $$Out = scale*X$$
AddAttr<float>("scale", "The scaling factor of the scale operator.") AddAttr<float>("scale", "The scaling factor of the scale operator.")
.SetDefault(1.0); .SetDefault(1.0);
AddAttr<float>("bias", "The bias of the scale operator.").SetDefault(0.0); AddAttr<float>("bias", "The bias of the scale operator.").SetDefault(0.0);
AddAttr<bool>(
"bias_after_scale",
"Apply bias addition after or before scaling. It is useful for "
"numeric stability in some circumstances.")
.SetDefault(true);
} }
}; };
...@@ -82,6 +87,7 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker { ...@@ -82,6 +87,7 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("scale", GetAttr("scale")); grad_op->SetAttr("scale", GetAttr("scale"));
grad_op->SetAttr("bias", 0.0f); grad_op->SetAttr("bias", 0.0f);
grad_op->SetAttr("bias_after_scale", true);
return std::unique_ptr<framework::OpDesc>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -35,6 +35,7 @@ class ScaleKernel : public framework::OpKernel<T> { ...@@ -35,6 +35,7 @@ class ScaleKernel : public framework::OpKernel<T> {
auto scale = static_cast<T>(ctx.Attr<float>("scale")); auto scale = static_cast<T>(ctx.Attr<float>("scale"));
auto bias = static_cast<T>(ctx.Attr<float>("bias")); auto bias = static_cast<T>(ctx.Attr<float>("bias"));
auto bias_after_scale = ctx.Attr<bool>("bias_after_scale");
if (in_var->IsType<framework::SelectedRows>() && in_var != out_var) { if (in_var->IsType<framework::SelectedRows>() && in_var != out_var) {
auto& in_slr = in_var->Get<framework::SelectedRows>(); auto& in_slr = in_var->Get<framework::SelectedRows>();
...@@ -46,8 +47,11 @@ class ScaleKernel : public framework::OpKernel<T> { ...@@ -46,8 +47,11 @@ class ScaleKernel : public framework::OpKernel<T> {
auto eigen_out = framework::EigenVector<T>::Flatten(*out); auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*in); auto eigen_in = framework::EigenVector<T>::Flatten(*in);
auto& dev = *ctx.template device_context<DeviceContext>().eigen_device(); auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
eigen_out.device(dev) = if (bias_after_scale) {
static_cast<T>(scale) * eigen_in + static_cast<T>(bias); eigen_out.device(dev) = scale * eigen_in + bias;
} else {
eigen_out.device(dev) = scale * (eigen_in + bias);
}
} }
}; };
......
...@@ -3388,13 +3388,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): ...@@ -3388,13 +3388,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
return out return out
def matmul(x, def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
y,
transpose_x=False,
transpose_y=False,
scale=1.0,
bias=0.0,
name=None):
""" """
Applies matrix multiplication to two tensors. Applies matrix multiplication to two tensors.
...@@ -3428,8 +3422,7 @@ def matmul(x, ...@@ -3428,8 +3422,7 @@ def matmul(x,
y (Variable): The input variable which is a Tensor or LoDTensor. y (Variable): The input variable which is a Tensor or LoDTensor.
transpose_x (bool): Whether to transpose :math:`x` before multiplication. transpose_x (bool): Whether to transpose :math:`x` before multiplication.
transpose_y (bool): Whether to transpose :math:`y` before multiplication. transpose_y (bool): Whether to transpose :math:`y` before multiplication.
scale (float): The scale of output. Default 1.0. alpha (float): The scale of output. Default 1.0.
bias (float): The bias added to output. Default 0.0.
name(str|None): A name for this layer(optional). If set None, the layer name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically. will be named automatically.
...@@ -3500,8 +3493,7 @@ def matmul(x, ...@@ -3500,8 +3493,7 @@ def matmul(x,
attrs={ attrs={
'transpose_X': transpose_x, 'transpose_X': transpose_x,
'transpose_Y': transpose_y, 'transpose_Y': transpose_y,
'scale': scale, 'alpha': alpha,
'bias': bias
}) })
return out return out
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册