Add axis for rowwise_add_op

256d6a33 · fengjiayi · e168fc44 · 256d6a33 · 256d6a33 · 256d6a33
6 changed file
--- a/paddle/framework/ddim.cc
+++ b/paddle/framework/ddim.cc
@@ -291,5 +291,9 @@ DDim flatten_to_2d(const DDim& src, int num_row_dims) {
       static_cast<int>(product(slice_ddim(src, rank - num_row_dims, rank)))});
 }
+DDim flatten_to_1d(const DDim& src) {
+  return make_ddim({static_cast<int>(product(src))});
+}
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/ddim.h
+++ b/paddle/framework/ddim.h
@@ -117,6 +117,8 @@ std::ostream& operator<<(std::ostream&, const DDim&);
 DDim flatten_to_2d(const DDim& src, int num_row_dims);
+DDim flatten_to_1d(const DDim& src);
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/framework/eigen.h
+++ b/paddle/framework/eigen.h
@@ -71,6 +71,15 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
    return EigenMatrix::From(tensor,
                             flatten_to_2d(tensor.dims(), num_row_dims));
  }
+  static typename EigenMatrix::ConstType Reshape(const Tensor& tensor,
+                                                 int num_row_dims) {
+    int rank = tensor.dims_.size();
+    PADDLE_ENFORCE(num_row_dims > 0 && num_row_dims < rank,
+                   "`num_row_dims` must be between (0, rank_of_tensor).");
+    return EigenMatrix::From(tensor,
+                             flatten_to_2d(tensor.dims(), num_row_dims));
+  }
 };
 template <typename T, int MajorType = Eigen::RowMajor,
@@ -78,13 +87,11 @@ template <typename T, int MajorType = Eigen::RowMajor,
 struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
  // Flatten reshapes a Tensor into an EigenVector.
  static typename EigenVector::Type Flatten(Tensor& tensor) {
-    return EigenVector::From(
+    return EigenVector::From(tensor, {static_cast<int>(product(tensor.dims_))});
-        tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
  }
  static typename EigenVector::ConstType Flatten(const Tensor& tensor) {
-    return EigenVector::From(
+    return EigenVector::From(tensor, {static_cast<int>(product(tensor.dims_))});
-        tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
  }
 };

--- a/paddle/operators/rowwise_add_op.cc
+++ b/paddle/operators/rowwise_add_op.cc
@@ -25,14 +25,19 @@ class RowwiseAddOp : public framework::OperatorWithKernel {
 protected:
  void InferShape(const framework::InferShapeContext &ctx) const override {
-    auto dim0 = ctx.Input<Tensor>("X")->dims();
+    auto x_dims = ctx.Input<Tensor>("X")->dims();
-    auto dim1 = ctx.Input<Tensor>("b")->dims();
+    auto b_dims = ctx.Input<Tensor>("b")->dims();
+    PADDLE_ENFORCE_GT(
-    PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix");
+        x_dims.size(), b_dims.size(),
-    PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector");
+        "The rank of input `X` must be larger than the one of input `b`.");
-    PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same");
-    PADDLE_ENFORCE(ctx.OutputSize("Out") == 1, "The output size must be 1");
+    int num_row_dims = b_dims.size();
-    ctx.Output<Tensor>("Out")->Resize(ctx.Input<Tensor>("X")->dims());
+    PADDLE_ENFORCE_EQ(framework::slice_ddim(
+                          x_dims, x_dims.size() - num_row_dims, x_dims.size()),
+                      b_dims, "The width of two operands must be same");
+    PADDLE_ENFORCE_EQ(ctx.OutputSize("Out"), 1, "The output size must be 1");
+    ctx.Output<Tensor>("Out")->Resize(x_dims);
  }
 };
@@ -61,13 +66,20 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("b"), "b should not be null");
    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
                            "Input(Out@GRAD) should not be null");
-    auto dims0 = ctx.Input<Tensor>("X")->dims();
+    auto x_dims = ctx.Input<Tensor>("X")->dims();
-    auto dims1 = ctx.Input<Tensor>("b")->dims();
+    auto b_dims = ctx.Input<Tensor>("b")->dims();
-    PADDLE_ENFORCE_EQ(1, dims1.size(), "b dims should be 1")
+    PADDLE_ENFORCE_GT(
+        x_dims.size(), b_dims.size(),
+        "The rank of input `X` must be larger than the one of input `b`.");
+    int num_row_dims = b_dims.size();
+    PADDLE_ENFORCE_EQ(framework::slice_ddim(
+                          x_dims, x_dims.size() - num_row_dims, x_dims.size()),
+                      b_dims, "The width of two operands must be same");
    auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
    auto *db = ctx.Output<Tensor>(framework::GradVarName("b"));
-    if (dx) dx->Resize(dims0);
+    if (dx) dx->Resize(x_dims);
-    if (db) db->Resize(dims1);
+    if (db) db->Resize(b_dims);
  }
 };

--- a/paddle/operators/rowwise_add_op.h
+++ b/paddle/operators/rowwise_add_op.h
@@ -33,10 +33,11 @@ class RowwiseAddKernel : public framework::OpKernel {
  void Compute(const framework::ExecutionContext& context) const override {
    auto out = context.Output<Tensor>("Out");
    out->mutable_data<T>(context.GetPlace());
+    int num_row_dims = context.Input<Tensor>("b")->dims().size();
-    auto input = EigenMatrix<T>::From(*context.Input<Tensor>("X"));
+    auto input =
-    auto bias = EigenVector<T>::From(*context.Input<Tensor>("b"));
+        EigenMatrix<T>::Reshape(*context.Input<Tensor>("X"), num_row_dims);
-    auto output = EigenMatrix<T>::From(*out);
+    auto bias = EigenVector<T>::Flatten(*context.Input<Tensor>("b"));
+    auto output = EigenMatrix<T>::Reshape(*out, num_row_dims);
    const int bias_size = bias.dimension(0);
    const int rest_size = input.size() / bias_size;
@@ -54,12 +55,14 @@ class RowwiseAddGradKernel : public framework::OpKernel {
    auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
    auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
    auto* db = context.Output<Tensor>(framework::GradVarName("b"));
+    int num_row_dims = context.Input<Tensor>("b")->dims().size();
-    auto out_grad = EigenMatrix<T>::From(*dout);
+    auto out_grad = EigenMatrix<T>::Reshape(*dout, num_row_dims);
    auto place = context.GetEigenDevice<Place>();
    if (dx) {
      dx->mutable_data<T>(context.GetPlace());
-      EigenMatrix<T>::From(*dx).device(place) = out_grad;
+      EigenMatrix<T>::Reshape(*dx, num_row_dims).device(place) = out_grad;
    }
    if (db) {

--- a/python/paddle/v2/framework/tests/test_rowwise_add_op.py
+++ b/python/paddle/v2/framework/tests/test_rowwise_add_op.py
@@ -16,6 +16,18 @@ class TestRowwiseAddOp(unittest.TestCase):
        self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
+class TestRowwiseAddOp2(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+    def setUp(self):
+        self.type = "rowwise_add"
+        self.inputs = {
+            'X': np.random.random((13, 6, 7, 8)).astype("float32"),
+            'b': np.random.random((7, 8)).astype("float32")
+        }
+        self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
 class TestRowwiseAddGradOp(GradientChecker):
    def setUp(self):
        self.op = create_op("rowwise_add")
@@ -34,5 +46,23 @@ class TestRowwiseAddGradOp(GradientChecker):
        self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
+class TestRowwiseAddGradOp2(GradientChecker):
+    def setUp(self):
+        self.op = create_op("rowwise_add")
+        self.inputs = {
+            "X": np.random.uniform(0.1, 1, [2, 3, 2, 5]).astype("float32"),
+            "b": np.random.uniform(0.1, 1, [2, 5]).astype("float32")
+        }
+    def test_normal(self):
+        self.check_grad(self.op, self.inputs, ["X", "b"], "Out")
+    def test_ignore_b(self):
+        self.check_grad(self.op, self.inputs, ["X"], "Out", no_grad_set={"b"})
+    def test_ignore_x(self):
+        self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
 if __name__ == '__main__':
    unittest.main()