提交 256d6a33 编写于 作者: F fengjiayi

Add axis for rowwise_add_op

上级 e168fc44
......@@ -291,5 +291,9 @@ DDim flatten_to_2d(const DDim& src, int num_row_dims) {
static_cast<int>(product(slice_ddim(src, rank - num_row_dims, rank)))});
}
DDim flatten_to_1d(const DDim& src) {
return make_ddim({static_cast<int>(product(src))});
}
} // namespace framework
} // namespace paddle
......@@ -117,6 +117,8 @@ std::ostream& operator<<(std::ostream&, const DDim&);
DDim flatten_to_2d(const DDim& src, int num_row_dims);
DDim flatten_to_1d(const DDim& src);
} // namespace framework
} // namespace paddle
......
......@@ -71,6 +71,15 @@ struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
return EigenMatrix::From(tensor,
flatten_to_2d(tensor.dims(), num_row_dims));
}
static typename EigenMatrix::ConstType Reshape(const Tensor& tensor,
int num_row_dims) {
int rank = tensor.dims_.size();
PADDLE_ENFORCE(num_row_dims > 0 && num_row_dims < rank,
"`num_row_dims` must be between (0, rank_of_tensor).");
return EigenMatrix::From(tensor,
flatten_to_2d(tensor.dims(), num_row_dims));
}
};
template <typename T, int MajorType = Eigen::RowMajor,
......@@ -78,13 +87,11 @@ template <typename T, int MajorType = Eigen::RowMajor,
struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
// Flatten reshapes a Tensor into an EigenVector.
static typename EigenVector::Type Flatten(Tensor& tensor) {
return EigenVector::From(
tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
return EigenVector::From(tensor, {static_cast<int>(product(tensor.dims_))});
}
static typename EigenVector::ConstType Flatten(const Tensor& tensor) {
return EigenVector::From(
tensor, make_ddim({static_cast<int>(product(tensor.dims_))}));
return EigenVector::From(tensor, {static_cast<int>(product(tensor.dims_))});
}
};
......
......@@ -25,14 +25,19 @@ class RowwiseAddOp : public framework::OperatorWithKernel {
protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
auto dim0 = ctx.Input<Tensor>("X")->dims();
auto dim1 = ctx.Input<Tensor>("b")->dims();
PADDLE_ENFORCE(dim0.size() == 2, "Input 0 must be matrix");
PADDLE_ENFORCE(dim1.size() == 1, "The second input must be vector");
PADDLE_ENFORCE(dim0[1] == dim1[0], "The width of two input must be same");
PADDLE_ENFORCE(ctx.OutputSize("Out") == 1, "The output size must be 1");
ctx.Output<Tensor>("Out")->Resize(ctx.Input<Tensor>("X")->dims());
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto b_dims = ctx.Input<Tensor>("b")->dims();
PADDLE_ENFORCE_GT(
x_dims.size(), b_dims.size(),
"The rank of input `X` must be larger than the one of input `b`.");
int num_row_dims = b_dims.size();
PADDLE_ENFORCE_EQ(framework::slice_ddim(
x_dims, x_dims.size() - num_row_dims, x_dims.size()),
b_dims, "The width of two operands must be same");
PADDLE_ENFORCE_EQ(ctx.OutputSize("Out"), 1, "The output size must be 1");
ctx.Output<Tensor>("Out")->Resize(x_dims);
}
};
......@@ -61,13 +66,20 @@ class RowwiseAddGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("b"), "b should not be null");
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
"Input(Out@GRAD) should not be null");
auto dims0 = ctx.Input<Tensor>("X")->dims();
auto dims1 = ctx.Input<Tensor>("b")->dims();
PADDLE_ENFORCE_EQ(1, dims1.size(), "b dims should be 1")
auto x_dims = ctx.Input<Tensor>("X")->dims();
auto b_dims = ctx.Input<Tensor>("b")->dims();
PADDLE_ENFORCE_GT(
x_dims.size(), b_dims.size(),
"The rank of input `X` must be larger than the one of input `b`.");
int num_row_dims = b_dims.size();
PADDLE_ENFORCE_EQ(framework::slice_ddim(
x_dims, x_dims.size() - num_row_dims, x_dims.size()),
b_dims, "The width of two operands must be same");
auto *dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto *db = ctx.Output<Tensor>(framework::GradVarName("b"));
if (dx) dx->Resize(dims0);
if (db) db->Resize(dims1);
if (dx) dx->Resize(x_dims);
if (db) db->Resize(b_dims);
}
};
......
......@@ -33,10 +33,11 @@ class RowwiseAddKernel : public framework::OpKernel {
void Compute(const framework::ExecutionContext& context) const override {
auto out = context.Output<Tensor>("Out");
out->mutable_data<T>(context.GetPlace());
auto input = EigenMatrix<T>::From(*context.Input<Tensor>("X"));
auto bias = EigenVector<T>::From(*context.Input<Tensor>("b"));
auto output = EigenMatrix<T>::From(*out);
int num_row_dims = context.Input<Tensor>("b")->dims().size();
auto input =
EigenMatrix<T>::Reshape(*context.Input<Tensor>("X"), num_row_dims);
auto bias = EigenVector<T>::Flatten(*context.Input<Tensor>("b"));
auto output = EigenMatrix<T>::Reshape(*out, num_row_dims);
const int bias_size = bias.dimension(0);
const int rest_size = input.size() / bias_size;
......@@ -54,12 +55,14 @@ class RowwiseAddGradKernel : public framework::OpKernel {
auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
auto* db = context.Output<Tensor>(framework::GradVarName("b"));
int num_row_dims = context.Input<Tensor>("b")->dims().size();
auto out_grad = EigenMatrix<T>::From(*dout);
auto out_grad = EigenMatrix<T>::Reshape(*dout, num_row_dims);
auto place = context.GetEigenDevice<Place>();
if (dx) {
dx->mutable_data<T>(context.GetPlace());
EigenMatrix<T>::From(*dx).device(place) = out_grad;
EigenMatrix<T>::Reshape(*dx, num_row_dims).device(place) = out_grad;
}
if (db) {
......
......@@ -16,6 +16,18 @@ class TestRowwiseAddOp(unittest.TestCase):
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
class TestRowwiseAddOp2(unittest.TestCase):
__metaclass__ = OpTestMeta
def setUp(self):
self.type = "rowwise_add"
self.inputs = {
'X': np.random.random((13, 6, 7, 8)).astype("float32"),
'b': np.random.random((7, 8)).astype("float32")
}
self.outputs = {'Out': np.add(self.inputs['X'], self.inputs['b'])}
class TestRowwiseAddGradOp(GradientChecker):
def setUp(self):
self.op = create_op("rowwise_add")
......@@ -34,5 +46,23 @@ class TestRowwiseAddGradOp(GradientChecker):
self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
class TestRowwiseAddGradOp2(GradientChecker):
def setUp(self):
self.op = create_op("rowwise_add")
self.inputs = {
"X": np.random.uniform(0.1, 1, [2, 3, 2, 5]).astype("float32"),
"b": np.random.uniform(0.1, 1, [2, 5]).astype("float32")
}
def test_normal(self):
self.check_grad(self.op, self.inputs, ["X", "b"], "Out")
def test_ignore_b(self):
self.check_grad(self.op, self.inputs, ["X"], "Out", no_grad_set={"b"})
def test_ignore_x(self):
self.check_grad(self.op, self.inputs, ["b"], "Out", no_grad_set={"X"})
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册