提交 256d6a33 编写于 作者: F fengjiayi

Add axis for rowwise_add_op

上级 e168fc44
...@@ -291,5 +291,9 @@ DDim flatten_to_2d(const DDim& src, int num_row_dims) { ...@@ -291,5 +291,9 @@ DDim flatten_to_2d(const DDim& src, int num_row_dims) {
static_cast<int>(product(slice_ddim(src, rank - num_row_dims, rank)))}); static_cast<int>(product(slice_ddim(src, rank - num_row_dims, rank)))});
} }
DDim flatten_to_1d(const DDim& src) {
return make_ddim({static_cast<int>(product(src))});
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -117,6 +117,8 @@ std::ostream& operator<<(std::ostream&, const DDim&); ...@@ -117,6 +117,8 @@ std::ostream& operator<<(std::ostream&, const DDim&);
DDim flatten_to_2d(const DDim& src, int num_row_dims); DDim flatten_to_2d(const DDim& src, int num_row_dims);
DDim flatten_to_1d(const DDim& src);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
......
...@@ -71,6 +71,15 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> { ...@@ -71,6 +71,15 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
return EigenMatrix::From(tensor, return EigenMatrix::From(tensor,
flatten_to_2d(tensor.dims(), num_row_dims)); flatten_to_2d(tensor.dims(), num_row_dims));
} }
static typename EigenMatrix::ConstType Reshape(const Tensor& tensor,
int num_row_dims) {
int rank = tensor.dims_.size();
PADDLE_ENFORCE(num_row_dims > 0 && num_row_dims < rank,
"`num_row_dims` must be between (0, rank_of_tensor).");
return EigenMatrix::From(tensor,
flatten_to_2d(tensor.dims(), num_row_dims));
}
}; };
template <typename T, int MajorType = Eigen::RowMajor, template <typename T, int MajorType = Eigen::RowMajor,
...@@ -78,13 +87,11 @@ template <typename T, int MajorType = Eigen::RowMajor, ...@@ -78,13 +87,11 @@ template <typename T, int MajorType = Eigen::RowMajor,
struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> { struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
// Flatten reshapes a Tensor into an EigenVector. // Flatten reshapes a Tensor into an EigenVector.
static typename EigenVector::Type Flatten(Tensor& tensor) { static typename EigenVector::Type Flatten(Tensor& tensor) {
return EigenVector::From( return EigenVector::From(tensor, {static_cast<int>(product(tensor.dims_))});
tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
} }
static typename EigenVector::ConstType Flatten(const Tensor& tensor) { static typename EigenVector::ConstType Flatten(const Tensor& tensor) {
return EigenVector::From( return EigenVector::From(tensor, {static_cast<int>(product(tensor.dims_))});
tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
} }
}; };
......
...@@ -25,14 +25,19 @@ class RowwiseAddOp : public framework::OperatorWithKernel { ...@@ -25,14 +25,19 @@ class RowwiseAddOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(const framework::InferShapeContext &ctx) const override { void InferShape(const framework::InferShapeContext &ctx) const override {
auto dim0 = ctx.Input<Tensor>("X")->dims(); auto x_dims = ctx.Input<Tensor>("X")->dims();
auto dim1 = ctx.Input<Tensor>("b")->dims(); auto b_dims = ctx.Input<Tensor>("b")->dims();
PADDLE_ENFORCE_GT(
PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix"); x_dims.size(), b_dims.size(),
PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector"); "The rank of input `X` must be larger than the one of input `b`.");
PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same");
PADDLE_ENFORCE(ctx.OutputSize("Out") == 1, "The output size must be 1"); int num_row_dims = b_dims.size();
ctx.Output<Tensor>("Out")->Resize(ctx.Input<Tensor>("X")->dims());
PADDLE_ENFORCE_EQ(framework::slice_ddim(
x_dims, x_dims.size() - num_row_dims, x_dims.size()),
b_dims, "The width of two operands must be same");
PADDLE_ENFORCE_EQ(ctx.OutputSize("Out"), 1, "The output size must be 1");
ctx.Output<Tensor>("Out")->Resize(x_dims);
} }
}; };
...@@ -61,13 +66,20 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel { ...@@ -61,13 +66,20 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("b"), "b should not be null"); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("b"), "b should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")), PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null"); "Input(Out@GRAD) should not be null");
auto dims0 = ctx.Input<Tensor>("X")->dims(); auto x_dims = ctx.Input<Tensor>("X")->dims();
auto dims1 = ctx.Input<Tensor>("b")->dims(); auto b_dims = ctx.Input<Tensor>("b")->dims();
PADDLE_ENFORCE_EQ(1, dims1.size(), "b dims should be 1") PADDLE_ENFORCE_GT(
x_dims.size(), b_dims.size(),
"The rank of input `X` must be larger than the one of input `b`.");
int num_row_dims = b_dims.size();
PADDLE_ENFORCE_EQ(framework::slice_ddim(
x_dims, x_dims.size() - num_row_dims, x_dims.size()),
b_dims, "The width of two operands must be same");
auto *dx = ctx.Output<Tensor>(framework::GradVarName("X")); auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *db = ctx.Output<Tensor>(framework::GradVarName("b")); auto *db = ctx.Output<Tensor>(framework::GradVarName("b"));
if (dx) dx->Resize(dims0); if (dx) dx->Resize(x_dims);
if (db) db->Resize(dims1); if (db) db->Resize(b_dims);
} }
}; };
......
...@@ -33,10 +33,11 @@ class RowwiseAddKernel : public framework::OpKernel { ...@@ -33,10 +33,11 @@ class RowwiseAddKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto out = context.Output<Tensor>("Out"); auto out = context.Output<Tensor>("Out");
out->mutable_data<T>(context.GetPlace()); out->mutable_data<T>(context.GetPlace());
int num_row_dims = context.Input<Tensor>("b")->dims().size();
auto input = EigenMatrix<T>::From(*context.Input<Tensor>("X")); auto input =
auto bias = EigenVector<T>::From(*context.Input<Tensor>("b")); EigenMatrix<T>::Reshape(*context.Input<Tensor>("X"), num_row_dims);
auto output = EigenMatrix<T>::From(*out); auto bias = EigenVector<T>::Flatten(*context.Input<Tensor>("b"));
auto output = EigenMatrix<T>::Reshape(*out, num_row_dims);
const int bias_size = bias.dimension(0); const int bias_size = bias.dimension(0);
const int rest_size = input.size() / bias_size; const int rest_size = input.size() / bias_size;
...@@ -54,12 +55,14 @@ class RowwiseAddGradKernel : public framework::OpKernel { ...@@ -54,12 +55,14 @@ class RowwiseAddGradKernel : public framework::OpKernel {
auto* dout = context.Input<Tensor>(framework::GradVarName("Out")); auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = context.Output<Tensor>(framework::GradVarName("X")); auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
auto* db = context.Output<Tensor>(framework::GradVarName("b")); auto* db = context.Output<Tensor>(framework::GradVarName("b"));
int num_row_dims = context.Input<Tensor>("b")->dims().size();
auto out_grad = EigenMatrix<T>::From(*dout); auto out_grad = EigenMatrix<T>::Reshape(*dout, num_row_dims);
auto place = context.GetEigenDevice<Place>(); auto place = context.GetEigenDevice<Place>();
if (dx) { if (dx) {
dx->mutable_data<T>(context.GetPlace()); dx->mutable_data<T>(context.GetPlace());
EigenMatrix<T>::From(*dx).device(place) = out_grad; EigenMatrix<T>::Reshape(*dx, num_row_dims).device(place) = out_grad;
} }
if (db) { if (db) {
......
...@@ -16,6 +16,18 @@ class TestRowwiseAddOp(unittest.TestCase): ...@@ -16,6 +16,18 @@ class TestRowwiseAddOp(unittest.TestCase):
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])} self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
class TestRowwiseAddOp2(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "rowwise_add"
self.inputs = {
'X': np.random.random((13, 6, 7, 8)).astype("float32"),
'b': np.random.random((7, 8)).astype("float32")
}
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
class TestRowwiseAddGradOp(GradientChecker): class TestRowwiseAddGradOp(GradientChecker):
def setUp(self): def setUp(self):
self.op = create_op("rowwise_add") self.op = create_op("rowwise_add")
...@@ -34,5 +46,23 @@ class TestRowwiseAddGradOp(GradientChecker): ...@@ -34,5 +46,23 @@ class TestRowwiseAddGradOp(GradientChecker):
self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"}) self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
class TestRowwiseAddGradOp2(GradientChecker):
def setUp(self):
self.op = create_op("rowwise_add")
self.inputs = {
"X": np.random.uniform(0.1, 1, [2, 3, 2, 5]).astype("float32"),
"b": np.random.uniform(0.1, 1, [2, 5]).astype("float32")
}
def test_normal(self):
self.check_grad(self.op, self.inputs, ["X", "b"], "Out")
def test_ignore_b(self):
self.check_grad(self.op, self.inputs, ["X"], "Out", no_grad_set={"b"})
def test_ignore_x(self):
self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册