未验证 提交 8ede1605 编写于 作者: T TeslaZhao 提交者: GitHub

cherry-pick:Add double grad in Squeeze and Unsqueeze to release/1.8, … (#27843)

* cherry-pick:Add double grad in Squeeze and Unsqueeze to release/1.8, test=develop
Signed-off-by: NTeslaZhao <zhaolisoftware@163.com>

* cherry-pick:Add double grad in Squeeze and Unsqueeze to release/1.8, test=develop
上级 64cdcd52
...@@ -13,61 +13,35 @@ See the License for the specific language governing permissions and ...@@ -13,61 +13,35 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/squeeze_op.h" #include "paddle/fluid/operators/squeeze_op.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
class SqueezeOp : public framework::OperatorWithKernel { framework::DDim GetOutputShape(const std::vector<int> squeeze_dims,
public: const framework::DDim &in_dims,
using framework::OperatorWithKernel::OperatorWithKernel; bool is_runtime) {
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Squeeze");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Squeeze");
const auto &x_dims = ctx->GetInputDim("X");
// Check input tensor dims (<6) Eigen limit.
PADDLE_ENFORCE_LE(x_dims.size(), 6,
platform::errors::InvalidArgument(
"The dimensions of Input(X) "
"should be in the range of [1, 6] (Eigen limit)."
"But received X's dimensions = %d, X's shape=[%s].",
x_dims.size(), x_dims));
const auto &axes = ctx->Attrs().Get<std::vector<int>>("axes");
auto out_dims = GetOutputShape(axes, x_dims);
ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx->ShareLoD("X", "Out");
}
}
static framework::DDim GetOutputShape(const std::vector<int> squeeze_dims,
const framework::DDim &in_dims) {
size_t num_squeeze_dims = squeeze_dims.size(); size_t num_squeeze_dims = squeeze_dims.size();
int cnt_squeezed_dims = 0; std::vector<bool> should_squeeze(in_dims.size(), false);
bool should_squeeze[9] = {false};
// Determines number of dimensions of output tensor after squeeze. // Mark dimensions need to be squeezed.
// Mark and count the dimensions need to be squeezed
if (num_squeeze_dims == 0) { if (num_squeeze_dims == 0) {
for (int idx = 0; idx < in_dims.size(); ++idx) { for (int i = 0; i < in_dims.size(); ++i) {
if (in_dims[idx] == 1) { if (in_dims[i] == 1) {
should_squeeze[idx] = true; should_squeeze[i] = true;
++cnt_squeezed_dims;
} }
} }
} else { } else {
for (size_t idx = 0; idx < num_squeeze_dims; ++idx) { for (size_t i = 0; i < num_squeeze_dims; ++i) {
int current = squeeze_dims[idx] < 0 ? squeeze_dims[idx] + in_dims.size() int current = squeeze_dims[i] < 0 ? squeeze_dims[i] + in_dims.size()
: squeeze_dims[idx]; : squeeze_dims[i];
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
current, 0, current, 0,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -81,22 +55,56 @@ class SqueezeOp : public framework::OperatorWithKernel { ...@@ -81,22 +55,56 @@ class SqueezeOp : public framework::OperatorWithKernel {
"But current axis is:%d, input tensor's shape = [%s].", "But current axis is:%d, input tensor's shape = [%s].",
-in_dims.size(), in_dims.size() - 1, current, in_dims)); -in_dims.size(), in_dims.size() - 1, current, in_dims));
if (!(should_squeeze[current])) { if (!should_squeeze[current]) {
++cnt_squeezed_dims; if (is_runtime) {
// At run time, dim of 1 is allowed to squeeze
if (in_dims[current] == 1) {
should_squeeze[current] = true;
} }
} else {
// At compile time, dim of -1 or 1 is allowed to squeeze
if (in_dims[current] == 1 || in_dims[current] == -1) {
should_squeeze[current] = true; should_squeeze[current] = true;
} }
} }
}
}
}
// Make output dimensions // Make output dimensions
std::vector<int64_t> output_shape(in_dims.size() - cnt_squeezed_dims, 0); std::vector<int64_t> output_shape;
for (int in_idx = 0, out_idx = 0; in_idx < in_dims.size(); ++in_idx) { for (int i = 0; i < in_dims.size(); ++i) {
if (!should_squeeze[in_idx]) { if (!should_squeeze[i]) {
output_shape[out_idx++] = in_dims[in_idx]; output_shape.push_back(in_dims[i]);
} }
} }
return framework::make_ddim(output_shape); return framework::make_ddim(output_shape);
}
class SqueezeOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Squeeze");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Squeeze");
const auto &x_dims = ctx->GetInputDim("X");
// Check input tensor dims (<6) Eigen limit.
PADDLE_ENFORCE_LE(x_dims.size(), 6,
platform::errors::InvalidArgument(
"The dimensions of Input(X) "
"should be in the range of [1, 6] (Eigen limit)."
"But received X's dimensions = %d, X's shape=[%s].",
x_dims.size(), x_dims));
const auto &axes = ctx->Attrs().Get<std::vector<int>>("axes");
auto out_dims = GetOutputShape(axes, x_dims, false);
ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx->ShareLoD("X", "Out");
}
} }
protected: protected:
...@@ -183,7 +191,7 @@ class Squeeze2Op : public framework::OperatorWithKernel { ...@@ -183,7 +191,7 @@ class Squeeze2Op : public framework::OperatorWithKernel {
const auto &axes = ctx->Attrs().Get<std::vector<int>>("axes"); const auto &axes = ctx->Attrs().Get<std::vector<int>>("axes");
auto out_dims = SqueezeOp::GetOutputShape(axes, x_dims); auto out_dims = GetOutputShape(axes, x_dims, false);
ctx->SetOutputDim("Out", out_dims); ctx->SetOutputDim("Out", out_dims);
if (x_dims[0] == out_dims[0]) { if (x_dims[0] == out_dims[0]) {
// Only pass LoD when the first dimension of output and Input(X) // Only pass LoD when the first dimension of output and Input(X)
...@@ -241,6 +249,19 @@ class Squeeze2GradOp : public framework::OperatorWithKernel { ...@@ -241,6 +249,19 @@ class Squeeze2GradOp : public framework::OperatorWithKernel {
} }
}; };
template <typename T>
class SqueezeDoubleGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> grad_op) const override {
grad_op->SetType("squeeze");
grad_op->SetInput("X", this->OutputGrad(framework::GradVarName("X")));
grad_op->SetOutput("Out", this->InputGrad(framework::GradVarName("Out")));
grad_op->SetAttrMap(this->Attrs());
}
};
// FIXME(zcd): squeeze2 adds an intermediate output(XShape) based on squeeze, // FIXME(zcd): squeeze2 adds an intermediate output(XShape) based on squeeze,
// the XShape is used to carry the shape and lod of X which will be used in // the XShape is used to carry the shape and lod of X which will be used in
// squeeze_grad, in this way, the framework can reuse the memory of X // squeeze_grad, in this way, the framework can reuse the memory of X
...@@ -271,11 +292,25 @@ class Squeeze2GradOpMaker : public framework::SingleGradOpMaker<T> { ...@@ -271,11 +292,25 @@ class Squeeze2GradOpMaker : public framework::SingleGradOpMaker<T> {
} }
}; };
DECLARE_INPLACE_OP_INFERER(SequeezeInplaceInferer, {"X", "Out"}); template <typename T>
DECLARE_INPLACE_OP_INFERER(SequeezeGradInplaceInferer, class Squeeze2DoubleGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> grad_op) const override {
grad_op->SetType("squeeze2");
grad_op->SetInput("X", this->OutputGrad(framework::GradVarName("X")));
grad_op->SetOutput("Out", this->InputGrad(framework::GradVarName("Out")));
grad_op->SetOutput("XShape", this->Input("XShape"));
grad_op->SetAttrMap(this->Attrs());
}
};
DECLARE_INPLACE_OP_INFERER(SqueezeInplaceInferer, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(SqueezeGradInplaceInferer,
{framework::GradVarName("Out"), {framework::GradVarName("Out"),
framework::GradVarName("X")}); framework::GradVarName("X")});
DECLARE_NO_NEED_BUFFER_VARS_INFERER(SqueezeGradNoNeedBufferVarsInference, "X"); DECLARE_NO_NEED_BUFFER_VARS_INFERER(SqueezeGradNoNeedBufferVarsInferer, "X");
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -284,18 +319,23 @@ REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker, ...@@ -284,18 +319,23 @@ REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker,
ops::SqueezeGradOpMaker<paddle::framework::OpDesc>, ops::SqueezeGradOpMaker<paddle::framework::OpDesc>,
ops::SqueezeGradOpMaker<paddle::imperative::OpBase>); ops::SqueezeGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(squeeze_grad, ops::SqueezeGradOp, REGISTER_OPERATOR(squeeze_grad, ops::SqueezeGradOp,
ops::SqueezeGradNoNeedBufferVarsInference); ops::SqueezeDoubleGradOpMaker<paddle::framework::OpDesc>,
ops::SqueezeDoubleGradOpMaker<paddle::imperative::OpBase>,
ops::SqueezeGradNoNeedBufferVarsInferer);
REGISTER_OPERATOR(squeeze2, ops::Squeeze2Op, ops::Squeeze2OpMaker, REGISTER_OPERATOR(squeeze2, ops::Squeeze2Op, ops::Squeeze2OpMaker,
ops::Squeeze2GradOpMaker<paddle::framework::OpDesc>, ops::Squeeze2GradOpMaker<paddle::framework::OpDesc>,
ops::Squeeze2GradOpMaker<paddle::imperative::OpBase>, ops::Squeeze2GradOpMaker<paddle::imperative::OpBase>,
ops::SequeezeInplaceInferer); ops::SqueezeInplaceInferer);
REGISTER_OPERATOR(squeeze2_grad, ops::Squeeze2GradOp, REGISTER_OPERATOR(squeeze2_grad, ops::Squeeze2GradOp,
ops::SequeezeGradInplaceInferer); ops::Squeeze2DoubleGradOpMaker<paddle::framework::OpDesc>,
ops::Squeeze2DoubleGradOpMaker<paddle::imperative::OpBase>,
ops::SqueezeGradInplaceInferer);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
squeeze, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, float>, squeeze, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, float>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, double>, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, double>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, bool>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int>, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int8_t>, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -303,12 +343,14 @@ REGISTER_OP_CPU_KERNEL( ...@@ -303,12 +343,14 @@ REGISTER_OP_CPU_KERNEL(
squeeze_grad, squeeze_grad,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, float>, ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, double>, ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, bool>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int>, ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int8_t>, ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
squeeze2, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, float>, squeeze2, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, float>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, double>, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, double>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, bool>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int>, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int8_t>, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int64_t>); ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -316,6 +358,7 @@ REGISTER_OP_CPU_KERNEL( ...@@ -316,6 +358,7 @@ REGISTER_OP_CPU_KERNEL(
squeeze2_grad, squeeze2_grad,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, float>, ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, float>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, double>, ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, double>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, bool>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int>, ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int8_t>, ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -226,6 +226,19 @@ class UnsqueezeGradOpMaker : public framework::SingleGradOpMaker<T> { ...@@ -226,6 +226,19 @@ class UnsqueezeGradOpMaker : public framework::SingleGradOpMaker<T> {
} }
}; };
template <typename T>
class UnsqueezeDoubleGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> grad_op) const override {
grad_op->SetType("unsqueeze");
grad_op->SetInput("X", this->OutputGrad(framework::GradVarName("X")));
grad_op->SetOutput("Out", this->InputGrad(framework::GradVarName("Out")));
grad_op->SetAttrMap(this->Attrs());
}
};
// FIXME(zcd): unsqueeze2 adds an intermediate output(XShape) based on // FIXME(zcd): unsqueeze2 adds an intermediate output(XShape) based on
// unsqueeze, the XShape is used to carry the shape and lod of X which // unsqueeze, the XShape is used to carry the shape and lod of X which
// will be used in unsqueeze_grad, in this way, the framework can reuse // will be used in unsqueeze_grad, in this way, the framework can reuse
...@@ -302,12 +315,25 @@ class Unsqueeze2GradOp : public framework::OperatorWithKernel { ...@@ -302,12 +315,25 @@ class Unsqueeze2GradOp : public framework::OperatorWithKernel {
} }
}; };
template <typename T>
class Unsqueeze2DoubleGradOpMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> grad_op) const override {
grad_op->SetType("unsqueeze2");
grad_op->SetInput("X", this->OutputGrad(framework::GradVarName("X")));
grad_op->SetOutput("Out", this->InputGrad(framework::GradVarName("Out")));
grad_op->SetOutput("XShape", this->Input("XShape"));
grad_op->SetAttrMap(this->Attrs());
}
};
DECLARE_INPLACE_OP_INFERER(UnsqueezeInplaceInferer, {"X", "Out"}); DECLARE_INPLACE_OP_INFERER(UnsqueezeInplaceInferer, {"X", "Out"});
DECLARE_INPLACE_OP_INFERER(UnsqueezeGradInplaceInferer, DECLARE_INPLACE_OP_INFERER(UnsqueezeGradInplaceInferer,
{framework::GradVarName("Out"), {framework::GradVarName("Out"),
framework::GradVarName("X")}); framework::GradVarName("X")});
DECLARE_NO_NEED_BUFFER_VARS_INFERER(UnsqueezeGradOpNoNeedBufferVarInference, DECLARE_NO_NEED_BUFFER_VARS_INFERER(UnsqueezeGradOpNoNeedBufferVarInferer, "X");
"X");
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -316,13 +342,17 @@ REGISTER_OPERATOR(unsqueeze, ops::UnsqueezeOp, ops::UnsqueezeOpMaker, ...@@ -316,13 +342,17 @@ REGISTER_OPERATOR(unsqueeze, ops::UnsqueezeOp, ops::UnsqueezeOpMaker,
ops::UnsqueezeGradOpMaker<paddle::framework::OpDesc>, ops::UnsqueezeGradOpMaker<paddle::framework::OpDesc>,
ops::UnsqueezeGradOpMaker<paddle::imperative::OpBase>); ops::UnsqueezeGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(unsqueeze_grad, ops::UnsqueezeGradOp, REGISTER_OPERATOR(unsqueeze_grad, ops::UnsqueezeGradOp,
ops::UnsqueezeGradOpNoNeedBufferVarInference); ops::UnsqueezeDoubleGradOpMaker<paddle::framework::OpDesc>,
ops::UnsqueezeDoubleGradOpMaker<paddle::imperative::OpBase>,
ops::UnsqueezeGradOpNoNeedBufferVarInferer);
REGISTER_OPERATOR(unsqueeze2, ops::Unsqueeze2Op, ops::Unsqueeze2OpMaker, REGISTER_OPERATOR(unsqueeze2, ops::Unsqueeze2Op, ops::Unsqueeze2OpMaker,
ops::Unsqueeze2GradOpMaker<paddle::framework::OpDesc>, ops::Unsqueeze2GradOpMaker<paddle::framework::OpDesc>,
ops::Unsqueeze2GradOpMaker<paddle::imperative::OpBase>, ops::Unsqueeze2GradOpMaker<paddle::imperative::OpBase>,
ops::UnsqueezeInplaceInferer); ops::UnsqueezeInplaceInferer);
REGISTER_OPERATOR(unsqueeze2_grad, ops::Unsqueeze2GradOp, REGISTER_OPERATOR(unsqueeze2_grad, ops::Unsqueeze2GradOp,
ops::Unsqueeze2DoubleGradOpMaker<paddle::framework::OpDesc>,
ops::Unsqueeze2DoubleGradOpMaker<paddle::imperative::OpBase>,
ops::UnsqueezeGradInplaceInferer); ops::UnsqueezeGradInplaceInferer);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
......
...@@ -21,7 +21,6 @@ import paddle.fluid as fluid ...@@ -21,7 +21,6 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.fluid.core as core import paddle.fluid.core as core
import gradient_checker import gradient_checker
from decorator_helper import prog_scope from decorator_helper import prog_scope
...@@ -232,5 +231,53 @@ class TestExpandDoubleGradCheck(unittest.TestCase): ...@@ -232,5 +231,53 @@ class TestExpandDoubleGradCheck(unittest.TestCase):
self.func(p) self.func(p)
class TestSqueezeDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
x_shape = [1, 3, 1, 40]
axes = [0, 2]
eps = 0.005
dtype = np.float64
x = layers.data('x', x_shape, False, dtype)
x.persistable = True
out = layers.squeeze(x, axes)
x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
gradient_checker.double_grad_check(
[x], out, x_init=x_arr, place=place, eps=eps)
def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
class TestUnsqueezeDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
x_shape = [3, 40]
axes = [1, 2]
eps = 0.005
dtype = np.float64
x = layers.data('x', x_shape, False, dtype)
x.persistable = True
out = layers.unsqueeze(x, axes)
x_arr = np.random.uniform(-1, 1, x_shape).astype(dtype)
gradient_checker.double_grad_check(
[x], out, x_init=x_arr, place=place, eps=eps)
def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -18,6 +18,7 @@ import unittest ...@@ -18,6 +18,7 @@ import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle
# Correct: General. # Correct: General.
......
...@@ -18,6 +18,7 @@ import unittest ...@@ -18,6 +18,7 @@ import unittest
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from op_test import OpTest from op_test import OpTest
import paddle
# Correct: General. # Correct: General.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册